Compare commits

..

1636 Commits

Author SHA1 Message Date
Eric Blake
121829cb21 usb: Fix build with newer gcc
gcc 7 is pickier about our sources:

hw/usb/bus.c: In function ‘usb_port_location’:
hw/usb/bus.c:410:66: error: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size between 0 and 15 [-Werror=format-truncation=]
         snprintf(downstream->path, sizeof(downstream->path), "%s.%d",
                                                                  ^~
hw/usb/bus.c:410:9: note: ‘snprintf’ output between 3 and 28 bytes into a destination of size 16
         snprintf(downstream->path, sizeof(downstream->path), "%s.%d",
         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  upstream->path, portnr);
                  ~~~~~~~~~~~~~~~~~~~~~~~

But we know that there are at most 5 levels of USB hubs, with at
most two digits per level; that plus the separating dots means we
use at most 15 bytes (including trailing NUL) of our 16-byte field.
Adding an assertion to show gcc that we checked for truncation is
enough to shut up the false-positive warning.

Inspired by an idea by Dr. David Alan Gilbert <dgilbert@redhat.com>.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170717151334.17954-1-eblake@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-07-20 10:02:11 +02:00
Peter Maydell
d4e59218ab Merge remote-tracking branch 'remotes/berrange/tags/pull-qcrypto-2017-07-18-2' into staging
Merge qcrypto 2017/07/18 v2

# gpg: Signature made Wed 19 Jul 2017 10:11:21 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qcrypto-2017-07-18-2:
  tests: crypto: add hmac speed benchmark support
  tests: crypto: add hash speed benchmark support
  tests: crypto: add cipher speed benchmark support
  crypto: hmac: add af_alg-backend hmac support
  crypto: hash: add afalg-backend hash support
  crypto: cipher: add afalg-backend cipher support
  crypto: introduce some common functions for af_alg backend
  crypto: hmac: add hmac driver framework
  crypto: hmac: introduce qcrypto_hmac_ctx_new for glib-backend
  crypto: hmac: introduce qcrypto_hmac_ctx_new for nettle-backend
  crypto: hmac: introduce qcrypto_hmac_ctx_new for gcrypt-backend
  crypto: hmac: move crypto/hmac.h into include/crypto/
  crypto: hash: add hash driver framework
  crypto: cipher: add cipher driver framework
  crypto: cipher: introduce qcrypto_cipher_ctx_new for builtin-backend
  crypto: cipher: introduce qcrypto_cipher_ctx_new for nettle-backend
  crypto: cipher: introduce qcrypto_cipher_ctx_new for gcrypt-backend
  crypto: cipher: introduce context free function

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-19 20:45:37 +01:00
Peter Maydell
824dbfb45d Merge remote-tracking branch 'remotes/rth/tags/pull-axp-20170718' into staging
Queued target/alpha patches

# gpg: Signature made Wed 19 Jul 2017 05:42:55 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-axp-20170718:
  target/alpha: Log temp leaks
  target/alpha: Fix temp leak in gen_fbcond
  target/alpha: Fix temp leak in gen_call_pal
  target/alpha: Fix temp leak in gen_mtpr
  target/alpha: Fix temp leak in gen_bcond
  target/alpha: Merge several flag bytes into ENV->FLAGS
  target/alpha: Copy tb->flags into DisasContext
  target/alpha: Remove amask from tb->flags

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-19 17:51:36 +01:00
Peter Maydell
b3e46a8914 Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20170718-tag' into staging
Xen 2017/07/18

# gpg: Signature made Tue 18 Jul 2017 23:18:16 BST
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# gpg:                 aka "Stefano Stabellini <sstabellini@kernel.org>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20170718-tag:
  xen: don't use xenstore to save/restore physmap anymore
  xen/mapcache: introduce xen_replace_cache_entry()
  xen/mapcache: add an ability to create dummy mappings
  xen: move physmap saving into a separate function
  xen-platform: separate unplugging of NVMe disks
  xen_pt_msi.c: Check for xen_host_pci_get_* failures in xen_pt_msix_init()
  hw/xen: Set emu_mask for igd_opregion register

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-19 16:31:08 +01:00
Peter Maydell
a51568b78e Merge remote-tracking branch 'remotes/aurel/tags/pull-target-sh4-20170718' into staging
Queued target/sh4 patches

# gpg: Signature made Tue 18 Jul 2017 22:44:25 BST
# gpg:                using RSA key 0xBA9C78061DDD8C9B
# gpg: Good signature from "Aurelien Jarno <aurelien@aurel32.net>"
# gpg:                 aka "Aurelien Jarno <aurelien@jarno.fr>"
# gpg:                 aka "Aurelien Jarno <aurel32@debian.org>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 7746 2642 A9EF 94FD 0F77  196D BA9C 7806 1DDD 8C9B

* remotes/aurel/tags/pull-target-sh4-20170718: (31 commits)
  target/sh4: Use tcg_gen_lookup_and_goto_ptr
  target/sh4: Implement fsrra
  target/sh4: Add missing FPSCR.PR == 0 checks
  target/sh4: Implement fpchg
  target/sh4: Introduce CHECK_SH4A
  target/sh4: Introduce CHECK_FPSCR_PR_*
  target/sh4: Tidy misc illegal insn checks
  target/sh4: Unify code for CHECK_FPU_ENABLED
  target/sh4: Unify code for CHECK_PRIVILEGED
  target/sh4: Unify code for CHECK_NOT_DELAY_SLOT
  target/sh4: Simplify 64-bit fp reg-reg move
  target/sh4: Load/store Dr as 64-bit quantities
  target/sh4: Merge DREG into fpr64 routines
  target/sh4: Eliminate unused XREG macro
  target/sh4: Hoist fp register bank selection
  target/sh4: Pass DisasContext to fpr64 routines
  target/sh4: Unify cpu_fregs into FREG
  target/sh4: Hoist register bank selection
  linux-user/sh4: Clean env->flags on signal boundaries
  linux-user/sh4: Notice gUSA regions during signal delivery
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-19 14:42:18 +01:00
Peter Maydell
6d60e295ef Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into staging
# gpg: Signature made Tue 18 Jul 2017 17:11:07 BST
# gpg:                using RSA key 0x7DEF8106AAFC390E
# gpg: Good signature from "John Snow (John Huston) <jsnow@redhat.com>"
# Primary key fingerprint: FAEB 9711 A12C F475 812F  18F2 88A9 064D 1835 61EB
#      Subkey fingerprint: F9B7 ABDB BCAC DF95 BE76  CBD0 7DEF 8106 AAFC 390E

* remotes/jnsnow/tags/ide-pull-request:
  tests/ahci-test: Be mean with RAM
  ahci: split public and private interface
  ahci: Isolate public AHCI interface
  ahci: add ahci_get_num_ports

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-19 13:43:58 +01:00
Peter Maydell
988879b66e Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170718' into staging
migration/next for 20170718

# gpg: Signature made Tue 18 Jul 2017 16:39:33 BST
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration/20170718:
  migration: check global caps for validity
  migration: provide migrate_cap_add()
  migration: provide migrate_caps_check()
  migration: remove check against colo support
  migration: check global params for validity
  migration: provide migrate_params_apply()
  migration: introduce migrate_params_check()
  migration: export capabilities to props
  migration: export parameters to props
  qdev: provide DEFINE_PROP_INT64()
  migration/rdma: Send error during cancelling
  migration/rdma: Safely convert control types
  migration/rdma: Allow cancelling while waiting for wrid
  migration/rdma: fix qemu_rdma_block_for_wrid error paths
  migration: Close file on failed migration load
  migration/rdma: Fix race on source

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-19 12:30:41 +01:00
Peter Maydell
f1a46e8885 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Tue 18 Jul 2017 14:29:59 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (21 commits)
  qemu-img: Check for backing image if specified during create
  blockdev: move BDRV_O_NO_BACKING option forward
  block/vvfat: Fix compiler warning with gcc 7
  vvfat: initialize memory after allocating it
  vvfat: correctly parse non-ASCII short and long file names
  vvfat: add a constant for bootsector name
  vvfat: add constants for special values of name[0]
  qemu-iotests: Test unplug of -device without drive
  qemu-iotests: Test 'info block'
  scsi-disk: bdrv_attach_dev() for empty CD-ROM
  ide: bdrv_attach_dev() for empty CD-ROM
  block: List anonymous device BBs in query-block
  block/qapi: Use blk_all_next() for query-block
  block: Make blk_all_next() public
  block/qapi: Add qdev device name to query-block
  block: Make blk_get_attached_dev_id() public
  block/vpc.c: Handle write failures in get_image_offset()
  block/vmdk: Report failures in vmdk_read_cid()
  block: remove timer canceling in throttle_config()
  block: add clock_type field to ThrottleGroup
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-19 10:48:31 +01:00
Longpeng(Mike)
c7a9af4b45 tests: crypto: add hmac speed benchmark support
This patch add a hmac speed benchmark, it helps us to
measure the performance by using "make check-speed" or
using "./tests/benchmark-crypto-hmac" directly.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:05 +01:00
Longpeng(Mike)
0128cd29ee tests: crypto: add hash speed benchmark support
This patch add a hash speed benchmark, it helps us to
measure the performance by using "make check-speed" or
using "./tests/benchmark-crypto-hash" directly.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:05 +01:00
Longpeng(Mike)
1efd9d5ed4 tests: crypto: add cipher speed benchmark support
Now we have two qcrypto backends, libiary-backend and afalg-backend,
but which one is faster? This patch add a cipher speed benchmark, it
helps us to measure the performance by using "make check-speed" or
using "./tests/benchmark-crypto-cipher" directly.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:05 +01:00
Longpeng(Mike)
42e7e15f99 crypto: hmac: add af_alg-backend hmac support
Adds afalg-backend hmac support: introduces some private APIs
firstly, and then intergrates them into qcrypto_hmac_afalg_driver.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:05 +01:00
Longpeng(Mike)
9a05977348 crypto: hash: add afalg-backend hash support
Adds afalg-backend hash support: introduces some private APIs
firstly, and then intergrates them into qcrypto_hash_afalg_driver.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:05 +01:00
Longpeng(Mike)
25c60df32b crypto: cipher: add afalg-backend cipher support
Adds afalg-backend cipher support: introduces some private APIs
firstly, and then intergrates them into qcrypto_cipher_afalg_driver.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:05 +01:00
Longpeng(Mike)
f0d92b56d8 crypto: introduce some common functions for af_alg backend
The AF_ALG socket family is the userspace interface for linux
crypto API, this patch adds af_alg family support and some common
functions for af_alg backend. It'll be used by afalg-backend crypto
latter.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>

Maintainer: modified to report an error if AF_ALG is requested
but cannot be supported

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:05 +01:00
Longpeng(Mike)
14a5a2aef4 crypto: hmac: add hmac driver framework
1) makes the public APIs in hmac-nettle/gcrypt/glib static,
   and rename them with "nettle/gcrypt/glib" prefix.

2) introduces hmac framework, including QCryptoHmacDriver
   and new public APIs.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:05 +01:00
Longpeng(Mike)
d73c04e3ca crypto: hmac: introduce qcrypto_hmac_ctx_new for glib-backend
Extracts qcrypto_hmac_ctx_new() from qcrypto_hmac_new() for
glib-backend impls.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:04 +01:00
Longpeng(Mike)
8c2776d86c crypto: hmac: introduce qcrypto_hmac_ctx_new for nettle-backend
Extracts qcrypto_hmac_ctx_new() from qcrypto_hmac_new() for
nettle-backend impls.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:04 +01:00
Longpeng(Mike)
822d15ded8 crypto: hmac: introduce qcrypto_hmac_ctx_new for gcrypt-backend
1) Fix a handle-leak problem in qcrypto_hmac_new(), didn't free
   ctx->handle if gcry_mac_setkey fails.

2) Extracts qcrypto_hmac_ctx_new() from qcrypto_hmac_new() for
   gcrypt-backend impls.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:04 +01:00
Longpeng(Mike)
9767b75d92 crypto: hmac: move crypto/hmac.h into include/crypto/
Moves crypto/hmac.h into include/crypto/, likes cipher.h and hash.h

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:04 +01:00
Longpeng(Mike)
aa8efad903 crypto: hash: add hash driver framework
1) makes the public APIs in hash-nettle/gcrypt/glib static,
   and rename them with "nettle/gcrypt/glib" prefix.

2) introduces hash framework, including QCryptoHashDriver
   and new public APIs.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:04 +01:00
Longpeng(Mike)
75c8007809 crypto: cipher: add cipher driver framework
1) makes the public APIs in cipher-nettle/gcrypt/builtin static,
   and rename them with "nettle/gcrypt/builtin" prefix.

2) introduces cipher framework, including QCryptoCipherDriver
   and new public APIs.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:04 +01:00
Longpeng(Mike)
d962c6266c crypto: cipher: introduce qcrypto_cipher_ctx_new for builtin-backend
Extracts qcrypto_cipher_ctx_new() from qcrypto_cipher_new() for
builtin-backend impls.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:04 +01:00
Longpeng(Mike)
eabe6c58ac crypto: cipher: introduce qcrypto_cipher_ctx_new for nettle-backend
Extracts qcrypto_cipher_ctx_new() from qcrypto_cipher_new() for
nettle-backend impls.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:04 +01:00
Longpeng(Mike)
468fb27154 crypto: cipher: introduce qcrypto_cipher_ctx_new for gcrypt-backend
Extracts qcrypto_cipher_ctx_new() from qcrypto_cipher_new() for
gcrypt-backend impls.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:04 +01:00
Longpeng(Mike)
cc5eff0186 crypto: cipher: introduce context free function
Refactors the qcrypto_cipher_free(), splits it into two parts. One
is gcrypt/nettle__cipher_free_ctx() to free the special context.

This makes code more clear, what's more, it would be used by the
later patch.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-19 10:11:04 +01:00
Peter Maydell
63cb55783c Merge remote-tracking branch 'remotes/berrange/tags/pull-qio-2017-07-18-1' into staging
Merge I/O 2017/07/18 v1

# gpg: Signature made Tue 18 Jul 2017 11:31:53 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qio-2017-07-18-1:
  io: simplify qio_channel_attach_aio_context

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-19 09:11:38 +01:00
Richard Henderson
8aa5c65fd3 target/alpha: Log temp leaks
Tested-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-18 18:42:05 -10:00
Richard Henderson
6a9b110d54 target/alpha: Fix temp leak in gen_fbcond
Tested-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-18 18:42:02 -10:00
Richard Henderson
f401c0321f target/alpha: Fix temp leak in gen_call_pal
Tested-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-18 18:42:00 -10:00
Richard Henderson
a4535b8e3e target/alpha: Fix temp leak in gen_mtpr
Tested-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-18 18:41:58 -10:00
Richard Henderson
22d716c28e target/alpha: Fix temp leak in gen_bcond
Tested-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-18 18:41:55 -10:00
Richard Henderson
bcd2625da5 target/alpha: Merge several flag bytes into ENV->FLAGS
The flags are arranged such that we can manipulate them either
a whole, or as individual bytes.  The computation within
cpu_get_tb_cpu_state is now reduced to a single load and mask.

Tested-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-18 18:41:52 -10:00
Richard Henderson
489a0e6410 target/alpha: Copy tb->flags into DisasContext
Tested-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-18 18:41:49 -10:00
Richard Henderson
c6d41b363c target/alpha: Remove amask from tb->flags
This value is constant for the cpu and does not need
to be stored within the TB.

Tested-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-18 18:41:46 -10:00
Richard Henderson
ec2eb22ebb target/sh4: Use tcg_gen_lookup_and_goto_ptr
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-28-rth@twiddle.net>
[aurel32: fix whitespace]
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:18 +02:00
Richard Henderson
11b7aa234b target/sh4: Implement fsrra
Signed-off-by: Richard Henderson <rth@twiddle.net>

Message-Id: <20170718200255.31647-27-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:18 +02:00
Richard Henderson
61dedf2af7 target/sh4: Add missing FPSCR.PR == 0 checks
Both frchg and fschg require PR == 0, otherwise undefined_operation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-26-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:18 +02:00
Richard Henderson
907759f997 target/sh4: Implement fpchg
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-25-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:18 +02:00
Richard Henderson
ccae24d498 target/sh4: Introduce CHECK_SH4A
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-24-rth@twiddle.net>
[aurel32: fix conflict]
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:18 +02:00
Richard Henderson
7e9f7ca86f target/sh4: Introduce CHECK_FPSCR_PR_*
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-23-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:18 +02:00
Richard Henderson
93dc9c8964 target/sh4: Tidy misc illegal insn checks
Now that we have a do_illegal label, use goto in order
to self-document the forcing of the exception.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-22-rth@twiddle.net>
[aurel32: fix whitespace issues]
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:17 +02:00
Richard Henderson
dec4f042a1 target/sh4: Unify code for CHECK_FPU_ENABLED
We do not need to emit N copies of raising an exception.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-21-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:17 +02:00
Richard Henderson
6b98213da9 target/sh4: Unify code for CHECK_PRIVILEGED
We do not need to emit N copies of raising an exception.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-20-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:17 +02:00
Richard Henderson
dec16c6ee8 target/sh4: Unify code for CHECK_NOT_DELAY_SLOT
We do not need to emit N copies of raising an exception.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-19-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:17 +02:00
Richard Henderson
bdcb373902 target/sh4: Simplify 64-bit fp reg-reg move
We do not need to form full 64-bit quantities in order to perform
the move.  This reduces code expansion on 64-bit hosts.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-18-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:17 +02:00
Richard Henderson
4d57fa50d5 target/sh4: Load/store Dr as 64-bit quantities
This enforces proper alignment and makes the register update
more natural.  Note that there is a more serious bug fix for
fmov {DX}Rn,@(R0,Rn) to use a store instead of a load.

Signed-off-by: Richard Henderson <rth@twiddle.net>

Message-Id: <20170718200255.31647-17-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:17 +02:00
Richard Henderson
1e0b21d856 target/sh4: Merge DREG into fpr64 routines
Also add a debugging assert that we did signal illegal opc
for odd double-precision registers.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-16-rth@twiddle.net>
[aurel32: fix whitespace issues]
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:17 +02:00
Richard Henderson
0f73753d62 target/sh4: Eliminate unused XREG macro
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-15-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:17 +02:00
Richard Henderson
5c13bad9ec target/sh4: Hoist fp register bank selection
Compute which register bank to use once at the start of translation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-14-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:17 +02:00
Richard Henderson
e5d8053e76 target/sh4: Pass DisasContext to fpr64 routines
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-13-rth@twiddle.net>
[aurel32: fix whitespace issues]
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:16 +02:00
Richard Henderson
7c9f70386d target/sh4: Unify cpu_fregs into FREG
We were treating FREG as an index and REG as a TCGv.
Making FREG return a TCGv is both less confusing and
a step toward cleaner banking of cpu_fregs.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-12-rth@twiddle.net>
[aurel32: fix whitespace issues]
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:16 +02:00
Richard Henderson
3a3bb8d2b5 target/sh4: Hoist register bank selection
Compute which register bank to use once at the start of translation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-11-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:16 +02:00
Richard Henderson
b0e9c51a00 linux-user/sh4: Clean env->flags on signal boundaries
If a signal is delivered during the execution of a delay slot,
or a gUSA region, clear those bits from the environment so that
the signal handler does not start in that same state.

Cleaning the bits on signal return is paranoid good sense.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-10-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:16 +02:00
Richard Henderson
b0e4f0edf5 linux-user/sh4: Notice gUSA regions during signal delivery
We translate gUSA regions atomically in a parallel context.
But in a serial context a gUSA region may be interrupted.
In that case, restart the region as the kernel would.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-9-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:16 +02:00
Richard Henderson
d6a6cffdd3 target/sh4: Recognize common gUSA sequences
For many of the sequences produced by gcc or glibc,
we can translate these as host atomic operations.
Which saves the need to acquire the exclusive lock.

Signed-off-by: Richard Henderson <rth@twiddle.net>

Message-Id: <20170718200255.31647-8-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:16 +02:00
Richard Henderson
4bfa602bc2 target/sh4: Handle user-space atomics
For uniprocessors, SH4 uses optimistic restartable atomic sequences.
Upon an interrupt, a real kernel would simply notice magic values in
the registers and reset the PC to the start of the sequence.

For QEMU, we cannot do this in quite the same way.  Instead, we notice
the normal start of such a sequence (mov #-x,r15), and start a new TB
that can be executed under cpu_exec_step_atomic.

Reported-by: Bruno Haible  <bruno@clisp.org>
LP: https://bugs.launchpad.net/bugs/1701971
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-7-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:16 +02:00
Richard Henderson
1516184d8e target/sh4: Adjust TB_FLAG_PENDING_MOVCA
Don't leave an unused bit after DELAY_SLOT_MASK.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-6-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:16 +02:00
Richard Henderson
ca69176d52 target/sh4: Keep env->flags clean
If we mask off any out-of-band bits before we assign to the
variable, then we don't need to clean it up when reading.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-5-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:16 +02:00
Richard Henderson
e1933d1435 target/sh4: Introduce TB_FLAG_ENVFLAGS_MASK
We'll be putting more things into this bitmask soon.
Let's have a name that covers all possible uses.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-4-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:15 +02:00
Richard Henderson
4448a83606 target/sh4: Consolidate end-of-TB tests
We can fold 3 different tests within the decode loop
into a more accurate computation of max_insns to start.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170718200255.31647-3-rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:15 +02:00
Aurelien Jarno
92f1f83e34 target/sh4: return result of fcmp using TCG
Since that the T bit of the SR register is mapped using a TGC global,
it's better to return the value through TCG than writing it directly. It
allows to declare the helpers with the flag TCG_CALL_NO_WG.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170702202814.27793-5-aurelien@aurel32.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:15 +02:00
Aurelien Jarno
82e8251374 target/sh4: do not use a helper to implement fneg
There is no need to use a helper to flip one bit, just use a TCG xor
instruction instead.

Message-Id: <20170702202814.27793-5-aurelien@aurel32.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:15 +02:00
Aurelien Jarno
801f4dac57 target/sh4: fix FPSCR cause vs flag inversion
The floating-point status/control register contains cause and flag
bits. The cause bits are set to 0 before executing the instruction,
while the flag bits hold the status of the exception generated after
the field was last cleared.

Message-Id: <20170702202814.27793-4-aurelien@aurel32.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:15 +02:00
Aurelien Jarno
fea7d77d3e target/sh4: fix FPU unorderered compare
In case of unordered compare, the fcmp instructions should either
trigger and invalid exception (if enabled) or set T=0. The existing code
left it unchanged.

LP: https://bugs.launchpad.net/qemu/+bug/1701821
Reported-by: Bruno Haible <bruno@clisp.org>
Message-Id: <20170702202814.27793-3-aurelien@aurel32.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:15 +02:00
Aurelien Jarno
57f5c1b093 target/sh4: do not check for PR bit for fabs instruction
The SH4 manual is not fully clear about that, but real hardware do not
check for the PR bit, which allows to select between single or double
precision, for the fabs instruction. This is probably what is meant by
"Same operation is performed regardless of precision."

Remove the check, and at the same time use a TCG instruction instead of
a helper to clear one bit.

LP: https://bugs.launchpad.net/qemu/+bug/1701821
Reported-by: Bruno Haible <bruno@clisp.org>
Message-Id: <20170702202814.27793-2-aurelien@aurel32.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-18 23:39:15 +02:00
Igor Druzhinin
331b5189d7 xen: don't use xenstore to save/restore physmap anymore
If we have a system with xenforeignmemory_map2() implemented
we don't need to save/restore physmap on suspend/restore
anymore. In case we resume a VM without physmap - try to
recreate the physmap during memory region restore phase and
remap map cache entries accordingly. The old code is left
for compatibility reasons.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-07-18 14:16:52 -07:00
Igor Druzhinin
5ba3d75645 xen/mapcache: introduce xen_replace_cache_entry()
This new call is trying to update a requested map cache entry
according to the changes in the physmap. The call is searching
for the entry, unmaps it and maps again at the same place using
a new guest address. If the mapping is dummy this call will
make it real.

This function makes use of a new xenforeignmemory_map2() call
with an extended interface that was recently introduced in
libxenforeignmemory [1].

[1] https://www.mail-archive.com/xen-devel@lists.xen.org/msg113007.html

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-07-18 14:16:09 -07:00
Igor Druzhinin
759235653d xen/mapcache: add an ability to create dummy mappings
Dummys are simple anonymous mappings that are placed instead
of regular foreign mappings in certain situations when we need
to postpone the actual mapping but still have to give a
memory region to QEMU to play with.

This is planned to be used for restore on Xen.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
2017-07-18 14:12:20 -07:00
Igor Druzhinin
697b66d006 xen: move physmap saving into a separate function
Non-functional change.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
2017-07-18 14:12:19 -07:00
Stefano Stabellini
04d6da4ff6 xen-platform: separate unplugging of NVMe disks
Commit 090fa1c8 "add support for unplugging NVMe disks..." extended the
existing disk unplug flag to cover NVMe disks as well as IDE and SCSI.

The recent thread on the xen-devel mailing list [1] has highlighted that
this is not desirable behaviour: PV frontends should be able to distinguish
NVMe disks from other types of disk and should have separate control over
whether they are unplugged.

This patch defines a new bit in the unplug mask for this purpose (see Xen
commit [2]) and also tidies up the definitions of, and improves the
comments regarding, the previously exiting bits in the protocol.

[1] https://lists.xen.org/archives/html/xen-devel/2017-03/msg02924.html
[2] http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=1096aa02

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-07-18 14:12:06 -07:00
Peter Maydell
64c7c1175b xen_pt_msi.c: Check for xen_host_pci_get_* failures in xen_pt_msix_init()
Check the return status of the xen_host_pci_get_* functions we call in
xen_pt_msix_init(), and fail device init if the reads failed rather than
ploughing ahead. (Spotted by Coverity: CID 777338.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-07-18 13:27:04 -07:00
Xiong Zhang
a19bae42e3 hw/xen: Set emu_mask for igd_opregion register
In igd passthrough environment, guest could only access opregion at the
first bootup time. Once guest shutdown, later guest couldn't access
opregion anymore.
This is because qemu set emulated guest opregion base address to host
register. Later guest get a wrong host opregion base address, and couldn't
access it anymore.

This patch set emu_mask for igd_opregion register, so guest won't set
guest opregion base address to host.

Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-07-18 13:26:35 -07:00
Peter Maydell
6887dc6700 Merge remote-tracking branch 'remotes/borntraeger/tags/s390x-20170718' into staging
s390: add z14 cpu model

- add a CPU model for the IBM z14 which was announced on July 17th 2017
- update linux headers to 4.13-rc0 to get a fix for an ioctl definition

# gpg: Signature made Tue 18 Jul 2017 09:56:24 BST
# gpg:                using RSA key 0x117BBC80B5A61C7C
# gpg: Good signature from "Christian Borntraeger (IBM) <borntraeger@de.ibm.com>"
# Primary key fingerprint: F922 9381 A334 08F9 DBAB  FBCA 117B BC80 B5A6 1C7C

* remotes/borntraeger/tags/s390x-20170718:
  s390x/cpumodel: z14 cpu models
  linux header sync against v4.13-rc1

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-18 21:13:48 +01:00
Peter Maydell
f9dada2baa Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Tue 18 Jul 2017 05:15:03 BST
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  live-block-ops.txt: Rename, rewrite, and improve it
  bitmaps.md: Convert to rST; move it into 'interop' dir

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-18 20:29:36 +01:00
Peter Maydell
20df6c7689 Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2017-07-17' into staging
nbd patches for 2017-07-17

- Eric Blake: nbd: Fix iotests failure due to changed client error message
- Eric Blake: [0/2] NBD fixes before softfreeze

# gpg: Signature made Mon 17 Jul 2017 23:12:20 BST
# gpg:                using RSA key 0xA7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>"
# gpg:                 aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>"
# gpg:                 aka "[jpeg image of size 6874]"
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2  F3AA A7A1 6B4A 2527 436A

* remotes/ericb/tags/pull-nbd-2017-07-17:
  nbd: Fix server reply to NBD_OPT_EXPORT_NAME of older clients
  nbd: Trace client command being sent
  nbd: Fix iotests failure due to changed client error message

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-18 18:29:35 +01:00
Peter Maydell
ff3351d449 Merge remote-tracking branch 'remotes/rth/tags/pull-s390-20170717' into staging
Queued s390x tcg patches, v2

# gpg: Signature made Mon 17 Jul 2017 22:22:23 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-s390-20170717:
  target/s390x: Fix risbg handling
  target/s390x: Allow to enable "idtes" feature for TCG
  target/s390x: Mark ETF3 and ETF3_ENH facilities as available
  target/s390x: Implement TRTR
  target/s390x: Implement SRSTU
  target/s390x: Tidy SRST
  target/s390x: Implement CONVERT UNICODE insns
  target/s390x: Implement CSST

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-18 17:54:51 +01:00
Dr. David Alan Gilbert
ff0ca96234 tests/ahci-test: Be mean with RAM
The migration tests used two VMs each with -m 1024 this caused
problems when run in some small, pessimistic test VMs (netbsd).
We can just be meaner with the amount of RAM in the test and use -m 384

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: 20170714152820.24034-1-dgilbert@redhat.com
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
2017-07-18 11:47:57 -04:00
John Snow
70e2337030 ahci: split public and private interface
Complete the split by renaming ahci_public.h --> ahci.h and
moving the current ahci.h to hw/ide/ahci_internal.h.

Adjust ahci_internal.h to now load ahci.h instead of ahci_public.h.

Finalize the split by switching external users to the new header.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170623220926.11479-4-jsnow@redhat.com
Signed-off-by: John Snow <jsnow@redhat.com>
2017-07-18 11:47:57 -04:00
John Snow
5402fda5ad ahci: Isolate public AHCI interface
Begin separating the public/private interface by removing the minimum
set of information used by code outside of hw/ide/ and calling this
a new ahci_public.h file, which will be renamed to ahci.h in a future
patch.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170623220926.11479-3-jsnow@redhat.com
Signed-off-by: John Snow <jsnow@redhat.com>
2017-07-18 11:47:57 -04:00
John Snow
bbe3179a13 ahci: add ahci_get_num_ports
Instead of reaching into the PCI state, allow the AHCIDevice to
respond with how many ports it has.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170623220926.11479-2-jsnow@redhat.com
Signed-off-by: John Snow <jsnow@redhat.com>
2017-07-18 11:47:56 -04:00
Peter Maydell
9d836fc552 Merge remote-tracking branch 'remotes/awilliam/tags/vfio-updates-20170717.0' into staging
VFIO fixes 2017-07-17

 - Init re-order to better support hot-add on SPAR (Alexey Kardashevskiy)

# gpg: Signature made Mon 17 Jul 2017 21:33:01 BST
# gpg:                using RSA key 0x239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B  8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-updates-20170717.0:
  vfio-pci, ppc64/spapr: Reorder group-to-container attaching

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-18 16:43:47 +01:00
Peter Xu
6b19a7d91c migration: check global caps for validity
Checks validity for all the capabilities that we enabled with command
line.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <1500349150-13240-11-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:26 +02:00
Peter Xu
4e4a3d3aa6 migration: provide migrate_cap_add()
Abstracted from migrate_set_block_enabled() to allocate
MigrationCapabilityStatusList properly.

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1500349150-13240-10-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:26 +02:00
Peter Xu
4a84214ebe migration: provide migrate_caps_check()
Abstract helper function to check migration capabilities (from the old
qmp_migrate_set_capabilities).  Prepare to be used somewhere else.

There is side effect on the change: when applying the capabilities, we
were skipping the invalid ones, but still applying the valid ones (if
they are provided in the same QMP request). After this refactoring,
we'll ignore all the capabilities if we detected invalid setup along the
way. However, I don't think it is a problem since general users should
not provide anything invalid after all.

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1500349150-13240-9-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:25 +02:00
Peter Xu
fd198f9002 migration: remove check against colo support
Since commit a15215f3 ("build: remove --enable-colo/--disable-colo"),
colo is always supported. We don't need any colo_supported() now since
it is always true. Removing any extra code that depends on it.

CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Hailiang Zhang <zhang.zhanghailiang@huawei.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Zhang Chen<zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1500349150-13240-8-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:24 +02:00
Peter Xu
8b0b29dcec migration: check global params for validity
Adding validity check for the migration parameters passed in via global
properties.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <1500349150-13240-7-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:23 +02:00
Peter Xu
476c72aa91 migration: provide migrate_params_apply()
Abstracted from qmp_migrate_set_parameters().

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1500349150-13240-6-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:22 +02:00
Peter Xu
16d063bc2a migration: introduce migrate_params_check()
Helper to check the parameters. Abstracted from
qmp_migrate_set_parameters().

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1500349150-13240-5-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:21 +02:00
Peter Xu
2081475841 migration: export capabilities to props
Do the same thing to migration capabilities, just like what we did in
previous patch for migration parameters.

Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1500349150-13240-4-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:21 +02:00
Peter Xu
89632fafdc migration: export parameters to props
Export migration parameters to qdev properties. Then we can use, for
example:

  -global migration.x-cpu-throttle-initial=xxx

To specify migration parameters during init.

Prefix "x-" is appended for each parameter exported to show that this is
not a stable interface, and only for debugging/testing purpose.

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1500349150-13240-3-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:20 +02:00
Peter Xu
07d1d063d3 qdev: provide DEFINE_PROP_INT64()
We have nearly all the stuff, but this one is missing. Add it in.

Am going to use this new helper for MigrationParameters fields, since
most of them are int64_t.

CC: Markus Armbruster <armbru@redhat.com>
CC: Eduardo Habkost <ehabkost@redhat.com>
CC: Marc-André Lureau <marcandre.lureau@redhat.com>
CC: Peter Xu <peterx@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1500349150-13240-2-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:19 +02:00
Dr. David Alan Gilbert
32bce19634 migration/rdma: Send error during cancelling
When we issue a cancel and clean up the RDMA channel
send a CONTROL_ERROR to get the destination to quit.

The rdma_cleanup code waits for the event to come back
from the rdma_disconnect; but that wont happen until the
destination quits and there's currently nothing to force
it.

Note this makes the case of a cancel work while the destination
is alive, and it already works if the destination is
truly dead.  Note it doesn't fix the case where the destination
is hung (we get stuck waiting for the rdma_disconnect event).

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Message-Id: <20170717110936.23314-7-dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:18 +02:00
Dr. David Alan Gilbert
482a33c53c migration/rdma: Safely convert control types
control_desc[] is an array of strings that correspond to a
series of message types; they're used only for error messages, but if
the message type is seriously broken then we could go off the end of
the array.

Convert the array to a function control_desc() that bound checks.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170717110936.23314-6-dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:17 +02:00
Dr. David Alan Gilbert
9c98cfbe72 migration/rdma: Allow cancelling while waiting for wrid
When waiting for a WRID, if the other side dies we end up waiting
for ever with no way to cancel the migration.
Cure this by poll()ing the fd first with a timeout and checking
error flags and migration state.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Message-Id: <20170717110936.23314-5-dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:17 +02:00
Dr. David Alan Gilbert
0b3c15f097 migration/rdma: fix qemu_rdma_block_for_wrid error paths
The two places that 'goto err_block_for_wrid' weren't setting ret
and so would end up returning 0 even though we've failed.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Message-Id: <20170717110936.23314-4-dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:16 +02:00
Dr. David Alan Gilbert
3a0f2ceaed migration: Close file on failed migration load
Closing the file before exit on a failure allows
the source to cleanup better, especially with RDMA.

Partial fix for https://bugs.launchpad.net/qemu/+bug/1545052

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170717110936.23314-3-dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:15 +02:00
Dr. David Alan Gilbert
9cf2bab2ed migration/rdma: Fix race on source
Fix a race where the destination might try and send the source a
WRID_READY before the source has done a post-recv for it.

rdma_post_recv has to happen after the qp exists, and we're
OK since we've already called qemu_rdma_source_init that calls
qemu_alloc_qp.

This corresponds to:
https://bugzilla.redhat.com/show_bug.cgi?id=1285044

The race can be triggered by adding a few ms wait before this
post_recv_control (which was originally due to me turning on loads of
debug).

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Message-Id: <20170717110936.23314-2-dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-07-18 17:36:14 +02:00
Peter Maydell
368e708b4c Merge remote-tracking branch 'remotes/ehabkost/tags/x86-and-machine-pull-request' into staging
x86 and machine queue, 2017-07-17

# gpg: Signature made Mon 17 Jul 2017 19:46:14 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-and-machine-pull-request:
  qmp: Include parent type on 'qom-list-types' output
  qmp: Include 'abstract' field on 'qom-list-types' output
  tests: Simplify abstract-interfaces check with a helper
  i386: add Skylake-Server cpu model
  i386: Update comment about XSAVES on Skylake-Client
  i386: expose "TCGTCGTCGTCG" in the 0x40000000 CPUID leaf
  fw_cfg: move QOM type defines and fw_cfg types into fw_cfg.h
  fw_cfg: move qdev_init_nofail() from fw_cfg_init1() to callers
  fw_cfg: switch fw_cfg_find() to locate the fw_cfg device by type rather than path
  qom: Fix ambiguous path detection when ambiguous=NULL
  Revert "machine: Convert abstract typename on compat_props to subclass names"
  test-qdev-global-props: Test global property ordering
  qdev: fix the order compat and global properties are applied
  tests: Test case for object_resolve_path*()
  device-crash-test: Fix regexp on whitelist

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-18 15:24:11 +01:00
John Snow
6e6e55f5c2 qemu-img: Check for backing image if specified during create
Or, rather, force the open of a backing image if one was specified
for creation. Using a similar -unsafe option as rebase, allow qemu-img
to ignore the backing file validation if possible.

It may not always be possible, as in the existing case when a filesize
for the new image was not specified.

This is accomplished by shifting around the conditionals in
bdrv_img_create, such that a backing file is always opened unless we
provide BDRV_O_NO_BACKING. qemu-img is adjusted to pass this new flag
when -u is provided to create.

Sorry for the heinous looking diffstat, but it's mostly whitespace.

Inspired by: https://bugzilla.redhat.com/show_bug.cgi?id=1213786

Signed-off-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-18 15:27:37 +02:00
John Snow
2a32c6e82e blockdev: move BDRV_O_NO_BACKING option forward
For both external_snapshot_prepare and qmp_drive_mirror, we eventually
append the option BDRV_O_NO_BACKING. However, we generally do so after
we create the image.

To accommodate image creation wanting to verify that a backing file
exists or not, add this option prior to create to override checking
the existence of the backing file. This prevents QEMU from trying to
re-open a backing file that's already in use (thanks to qcow2 locking).

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-18 15:27:20 +02:00
Max Reitz
7c8730d45f block/vvfat: Fix compiler warning with gcc 7
gcc 7 complains that the sprintf() might write a null byte beyond the
end of the tail buffer.  That is wrong, but we can silence it by making
i unsigned (it can never be negative anyway, see the if condition right
before).  For some reason, this allows gcc to suddenly accurately
calculate the range of i so we can give the tail[] array the exact size
it needs to have (which is 8 bytes) without gcc complaining.

In addition, let us convert the sprintf() to snprintf(), because that is
always nicer, and add an assertion about the range of the return value
afterwards so we can see that "8 - len" will never be negative and thus
"entry->name + MIN(j, 8 - len)" will never be out of bounds.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-18 15:14:36 +02:00
Hervé Poussineau
f80256b7ee vvfat: initialize memory after allocating it
This prevents some host to guest memory content leaks.

Fixes: https://bugs.launchpad.net/qemu/+bug/1599539

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-18 15:14:36 +02:00
Hervé Poussineau
e03da26b71 vvfat: correctly parse non-ASCII short and long file names
Write support works again when image contains non-ASCII names. It is either the
case when user created a non-ASCII filename, or when initial directory contained
a non-ASCII filename (since 0c36111f57)

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-18 15:14:36 +02:00
Hervé Poussineau
63d261cb0d vvfat: add a constant for bootsector name
Also add links to related compatibility problems.

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-18 15:14:36 +02:00
Hervé Poussineau
8c4517fd6e vvfat: add constants for special values of name[0]
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-18 15:14:36 +02:00
Kevin Wolf
208c38e4e4 qemu-iotests: Test unplug of -device without drive
This caused an assertion failure until recently because the BlockBackend
would be detached on unplug, but was in fact never attached in the first
place. Add a regression test.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-07-18 15:14:36 +02:00
Kevin Wolf
e1824e585f qemu-iotests: Test 'info block'
This test makes sure that all block devices show up on 'info block',
with all of the expected information, in different configurations.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-07-18 15:14:36 +02:00
Kevin Wolf
83b4fe0ed5 scsi-disk: bdrv_attach_dev() for empty CD-ROM
If no drive=... option is passed (for an empty drive), we don't only
lack the BlockBackend normally created by parse_drive(), but we also
need to manually call blk_attach_dev().

This fixes at least a segfault when unplugging such devices, the bug
that they didn't show up in query-block, and probably some more
problems.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-07-18 15:14:36 +02:00
Kevin Wolf
947231ad3b ide: bdrv_attach_dev() for empty CD-ROM
If no drive=... option is passed (for an empty drive), we don't only
lack the BlockBackend normally created by parse_drive(), but we also
need to manually call blk_attach_dev().

IDE does not support hot unplug, but if it did, qdev would take care to
call the matching blk_detach_dev() on unplug.

This fixes at least the bug that such devices didn't show up in
query-block, and probably some more problems.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-07-18 15:14:36 +02:00
Kevin Wolf
ec18b0a93a block: List anonymous device BBs in query-block
Instead of listing only monitor-owned BlockBackends in query-block, also
add those anonymous BlockBackends that are owned by a qdev device and as
such under the control of the user.

This allows using query-block to inspect BlockBackends for the modern
configuration syntax with -blockdev and -device.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-07-18 15:14:36 +02:00
Kevin Wolf
d5b68844e6 block/qapi: Use blk_all_next() for query-block
This patch replaces the blk_next() loop in query-block by a
blk_all_next() one so that we also get access to BlockBackends that
aren't owned by the monitor. For now, the next thing we do is check
whether each BB has a name, so there is no semantic difference.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-07-18 15:14:36 +02:00
Kevin Wolf
a429b9b5f4 block: Make blk_all_next() public
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-07-18 15:14:36 +02:00
Kevin Wolf
46eade7be8 block/qapi: Add qdev device name to query-block
With -blockdev/-device, users can indirectly create anonymous
BlockBackends, while the state of such backends is still of interest. As
a preparation for making such BBs visible in query-block, make sure that
they can be identified even without a name by adding the ID/QOM path of
their qdev device to BlockInfo.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-07-18 15:14:35 +02:00
Kevin Wolf
77beef8365 block: Make blk_get_attached_dev_id() public
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-07-18 15:14:35 +02:00
Peter Maydell
cfc87e00c2 block/vpc.c: Handle write failures in get_image_offset()
Coverity (CID 1355236) points out that get_image_offset() doesn't check that
it actually succeeded in writing the updated block bitmap to the file.
Check the error return from bdrv_pwrite_sync() and propagate an error
response back up to the function which calls get_image_offset() for
a write so that it can return the error to its caller.

get_sector_offset() is only used for reads, but we move it to the
same API for consistency.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-18 15:14:35 +02:00
Peter Maydell
9877860e7b block/vmdk: Report failures in vmdk_read_cid()
The function vmdk_read_cid() can fail if the read on the underlying
block device fails, or if there's a format error in the VMDK file.
However its API doesn't provide a mechanism to report these errors,
and in some cases we were returning a CID of 0 and in some cases a
CID of 0xffffffff, either of which might potentially be valid values.

Change the function to return 0 on success or a negative errno, and
return the CID via a uint32_t* argument. Update the callsites to
handle and propagate the error appropriately.

This fixes in passing a Coverity-spotted issue (CID 1350038) where
we weren't checking the return value from sscanf().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-18 15:14:35 +02:00
Manos Pitsidianakis
27e4cf1303 block: remove timer canceling in throttle_config()
throttle_config() cancels the timers of the calling BlockBackend. This
doesn't make sense because other BlockBackends in the group remain
untouched. There's no need to cancel the timers in the one specific
BlockBackend so let's not do that. Throttled requests will run as
scheduled and future requests will follow the new configuration. This
also allows a throttle group's configuration to be changed even when it
has no members.

Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-18 15:14:35 +02:00
Manos Pitsidianakis
dbe824cc57 block: add clock_type field to ThrottleGroup
Clock type in throttling is currently inferred by the ThrottleTimer's
clock type even though it is a per-ThrottleGroup property; it doesn't
make sense to have different clock types in the same group. Moving this
to a field in ThrottleGroup can simplify some of the throttle functions.

Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-18 15:14:35 +02:00
Kevin Wolf
b1e1fa0c3a commit: Add NULL check for overlay_bs
I can't see how overlay_bs could become NULL with the current code, but
other code in this function already checks it and we can make Coverity
happy with this check, so let's add it.

Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-18 15:14:35 +02:00
Peter Maydell
e9277a19a1 Merge remote-tracking branch 'remotes/mdroth/tags/qga-pull-2017-07-17-v2-tag' into staging
qemu-ga patch queue

* new command: qemu-get-osinfo
* build fix for OpenBSD
* better error-reporting for failure on keyfile dump
* remove redundant initialization of qa_state global
* include libpcre in w32 package
* w32 localization fixes for service installation/registration

v2:
* fix build issue with older GCCs introduced with guest_get_osinfo
* relocated some declarations in guest_get_osinfo

# gpg: Signature made Tue 18 Jul 2017 11:52:45 BST
# gpg:                using RSA key 0x3353C9CEF108B584
# gpg: Good signature from "Michael Roth <flukshun@gmail.com>"
# gpg:                 aka "Michael Roth <mdroth@utexas.edu>"
# gpg:                 aka "Michael Roth <mdroth@linux.vnet.ibm.com>"
# Primary key fingerprint: CEAC C9E1 5534 EBAB B82D  3FA0 3353 C9CE F108 B584

* remotes/mdroth/tags/qga-pull-2017-07-17-v2-tag:
  test-qga: add test for guest-get-osinfo
  test-qga: pass environemnt to qemu-ga
  qemu-ga: add guest-get-osinfo command
  qga: report error on keyfile dump error
  qga-win32: remove a redundancy code
  qemu-ga: check if utmpx.h is available on the system
  qemu-ga: add missing libpcre to MSI build
  qga-win: fix installation on localized windows

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-18 14:14:32 +01:00
Peter Maydell
718d7f4f9c Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Mon 17 Jul 2017 16:40:18 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  block: fix shadowed variable in bdrv_co_pdiscard
  util/aio-win32: Only select on what we are actually waiting for

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-18 13:09:51 +01:00
Tomáš Golembiovský
339ca68bef test-qga: add test for guest-get-osinfo
Add test for guest-get-osinfo command.

Qemu-ga was modified to accept QGA_OS_RELEASE environment variable. If
the variable is defined it is interpreted as path to the os-release file
and it is parsed instead of the default paths.

Signed-off-by: Tomáš Golembiovský <tgolembi@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
* move declarations to beginning of functions
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-07-18 05:49:01 -05:00
Tomáš Golembiovský
c28afa76c0 test-qga: pass environemnt to qemu-ga
Modify fixture_setup() to pass environemnt variables to spawned qemu-ga
instance.

Signed-off-by: Tomáš Golembiovský <tgolembi@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-07-18 05:49:01 -05:00
Tomáš Golembiovský
9848f79740 qemu-ga: add guest-get-osinfo command
Add a new 'guest-get-osinfo' command for reporting basic information of
the guest operating system. This includes machine architecture,
version and release of the kernel and several fields from os-release
file if it is present (as defined in [1]).

[1] https://www.freedesktop.org/software/systemd/man/os-release.html

Signed-off-by: Vinzenz Feenstra <vfeenstr@redhat.com>
Signed-off-by: Tomáš Golembiovský <tgolembi@redhat.com>
* moved declarations to beginning of functions
* dropped unecessary initialization of struct utsname
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-07-18 05:48:57 -05:00
Peter Maydell
ed6458726a Merge remote-tracking branch 'remotes/aurel/tags/pull-target-mips-20170717' into staging
Queued target/mips patches

# gpg: Signature made Mon 17 Jul 2017 15:50:27 BST
# gpg:                using RSA key 0xBA9C78061DDD8C9B
# gpg: Good signature from "Aurelien Jarno <aurelien@aurel32.net>"
# gpg:                 aka "Aurelien Jarno <aurelien@jarno.fr>"
# gpg:                 aka "Aurelien Jarno <aurel32@debian.org>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 7746 2642 A9EF 94FD 0F77  196D BA9C 7806 1DDD 8C9B

* remotes/aurel/tags/pull-target-mips-20170717:
  target/mips: optimize WSBH, DSBH and DSHD
  mips: set CP0 Debug DExcCode for SDBBP instruction

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-18 11:41:03 +01:00
Peter Maydell
6c4591566d Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170717' into staging
target-arm queue:
 * new model of the ARM MPS2/MPS2+ FPGA based development board
 * clean up DISAS_* exit conditions and fix various regressions
   since commits e75449a346 8a6b28c7b5 (in particular including
   ones which broke OP-TEE guests)
 * make Cortex-M3 and M4 correctly default to 8 PMSA regions

# gpg: Signature made Mon 17 Jul 2017 13:43:45 BST
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170717:
  MAINTAINERS: Add entries for MPS2 board
  hw/arm/mps2: Add ethernet
  hw/arm/mps2: Add SCC
  hw/misc/mps2_scc: Implement MPS2 Serial Communication Controller
  hw/arm/mps2: Add timers
  hw/char/cmsdk-apb-timer: Implement CMSDK APB timer device
  hw/arm/mps2: Add UARTs
  hw/char/cmsdk-apb-uart.c: Implement CMSDK APB UART
  hw/arm/mps2: Implement skeleton mps2-an385 and mps2-an511 board models
  target/arm: use DISAS_EXIT for eret handling
  target/arm: use gen_goto_tb for ISB handling
  target/arm/translate: ensure gen_goto_tb sets exit flags
  target/arm/translate.h: expand comment on DISAS_EXIT
  target/arm/translate: make DISAS_UPDATE match declared semantics
  include/exec/exec-all: document common exit conditions
  target/arm: Make Cortex-M3 and M4 default to 8 PMSA regions
  qdev: support properties which don't set a default value
  qdev-properties.h: Explicitly set the default value for arraylen properties

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-18 10:35:06 +01:00
Jason J. Herne
cc28a5949b s390x/cpumodel: z14 cpu models
This patch introduces the CPU model for z14, along with all base and
optional features.

Signed-off-by: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-18 10:55:16 +02:00
Christian Borntraeger
ab6e15500b linux header sync against v4.13-rc1
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-18 10:55:16 +02:00
Peter Maydell
a778cd5610 Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Mon 17 Jul 2017 13:17:17 BST
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  virtio-net: fix offload ctrl endian
  virtion-net: Prefer is_power_of_2()
  docs/colo-proxy.txt: Update colo-proxy usage of net driver with vnet_header
  net/filter-rewriter.c: Make filter-rewriter support vnet_hdr_len
  net/colo-compare.c: Add vnet packet's tcp/udp/icmp compare
  net/colo.c: Add vnet packet parse feature in colo-proxy
  net/colo-compare.c: Make colo-compare support vnet_hdr_len
  net/colo-compare.c: Introduce parameter for compare_chr_send()
  net/colo.c: Make vnet_hdr_len as packet property
  net/filter-mirror.c: Add new option to enable vnet support for filter-redirector
  net/filter-mirror.c: Make filter mirror support vnet support.
  net/filter-mirror.c: Introduce parameter for filter_send()
  net/net.c: Add vnet_hdr support in SocketReadState
  net: Add vnet_hdr_len arguments in NetClientState

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-18 09:16:43 +01:00
Kashyap Chamarthy
8508eee740 live-block-ops.txt: Rename, rewrite, and improve it
This patch documents (including their QMP invocations) all the four
major kinds of live block operations:

  - `block-stream`
  - `block-commit`
  - `drive-mirror` (& `blockdev-mirror`)
  - `drive-backup` (& `blockdev-backup`)

Things considered while writing this document:

  - Use reStructuredText as markup language (with the goal of generating
    the HTML output using the Sphinx Documentation Generator).  It is
    gentler on the eye, and can be trivially converted to different
    formats.  (Another reason: upstream QEMU is considering to switch to
    Sphinx, which uses reStructuredText as its markup language.)

  - Raw QMP JSON output vs. 'qmp-shell'.  I debated with myself whether
    to only show raw QMP JSON output (as that is the canonical
    representation), or use 'qmp-shell', which takes key-value pairs.  I
    settled on the approach of: for the first occurrence of a command,
    use raw JSON; for subsequent occurrences, use 'qmp-shell', with an
    occasional exception.

  - Usage of `-blockdev` command-line.

  - Usage of 'node-name' vs. file path to refer to disks.  While we have
    `blockdev-{mirror, backup}` as 'node-name'-alternatives for
    `drive-{mirror, backup}`, the `block-commit` command still operates
    on file names for parameters 'base' and 'top'.  So I added a caveat
    at the beginning to that effect.

    Refer this related thread that I started (where I learnt
    `block-stream` was recently reworked to accept 'node-name' for 'top'
    and 'base' parameters):
    https://lists.nongnu.org/archive/html/qemu-devel/2017-05/msg06466.html
    "[RFC] Making 'block-stream', and 'block-commit' accept node-name"

All commands showed in this document were tested while documenting.

Thanks: Eric Blake for the section: "A note on points-in-time vs file
names".  This useful bit was originally articulated by Eric in his
KVMForum 2015 presentation, so I included that specific bit in this
document.

Signed-off-by: Kashyap Chamarthy <kchamart@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170717105205.32639-3-kchamart@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-07-18 00:11:01 -04:00
Kashyap Chamarthy
7746cf8aab bitmaps.md: Convert to rST; move it into 'interop' dir
This is part of the on-going effort to convert QEMU upstream
documentation syntax to reStructuredText (rST).

The conversion to rST was done using:

    $ pandoc -f markdown -t rst bitmaps.md -o bitmaps.rst

Then, make a couple of small syntactical adjustments.  While at it,
reword a statement to avoid ambiguity.  Addressing the feedback from
this thread:

    https://lists.nongnu.org/archive/html/qemu-devel/2017-06/msg05428.html

Signed-off-by: Kashyap Chamarthy <kchamart@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170717105205.32639-2-kchamart@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-07-18 00:11:01 -04:00
Marc-André Lureau
cbcd9ba1b7 qga: report error on keyfile dump error
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Cc:qemu-trivial@nongnu.org
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-07-17 19:22:14 -05:00
Peng Hao
f8837b3765 qga-win32: remove a redundancy code
In the first line of run_agent,it has set ga_state = s,don't need
set ga_state = s again behind.

Signed-off-by: Peng Hao <peng.hao2@zte.com.cn>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-07-17 19:12:16 -05:00
Tomáš Golembiovský
e674605f98 qemu-ga: check if utmpx.h is available on the system
Commit 161a56a906 added command guest-get-users and requires the
utmpx.h (defined by POSIX) to work. It is however not always available
(e.g. on OpenBSD) therefor a check for its existence is necessary.

Signed-off-by: Tomáš Golembiovský <tgolembi@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-07-17 19:10:09 -05:00
Thomas Lamprecht
f2dce827f5 qemu-ga: add missing libpcre to MSI build
glib depends on libpcre which was not shipped with the MSI, thus
starting of the qemu-ga.exe failed with the respective error message.

Tell WIXL to ship this library with the MSI to avoid this problem.

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
CC: Stefan Weil <sw@weilnetz.de>
CC: Michael Roth <mdroth@linux.vnet.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-07-17 18:58:37 -05:00
Daniel Rempel
009f38d985 qga-win: fix installation on localized windows
Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1357789
Replace hardcoded user and group names ("Administrators", "SYSTEM") with the ones acquired from system. Windows uses localized strings for these names and it may cause the installation to fail.
Windows has Well-known SIDs for "Administrators" group and "SYSTEM" user so they were used to identify required users and groups.
Well-known SIDs: https://support.microsoft.com/en-us/help/243330/well-known-security-identifiers-in-windows-operating-systems

Signed-off-by: Daniel Rempel <daniel@daynix.com>
Signed-off-by: Sameeh Jubran <sjubran@redhat.com>
Reviewed-by: Sameeh Jubran <sameeh@daynix.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-07-17 18:51:14 -05:00
Eric Blake
5f66d060db nbd: Fix server reply to NBD_OPT_EXPORT_NAME of older clients
A typo in commit 23e099c set the size of buf[] used in response
to NBD_OPT_EXPORT_NAME according to the length needed for old-style
negotiation (4 bytes of flag information) instead of the intended
2 bytes used in new style.  If the client doesn't enable
NBD_FLAG_C_NO_ZEROES, then the server sends two bytes too many,
and is then out of sync in response to the client's next command
(the bug is masked when modern qemu is the client, since we enable
the no zeroes flag).

While touching this code, add some more defines to nbd_internal.h
rather than having quite so many magic numbers in the .c; also,
use "" initialization rather than memset(), and tweak the oldstyle
negotiation to better match the spec description of the layout
(since the spec is big-endian, skipping two bytes as 0 followed by
writing a 2-byte flag is the same as writing a zero-extended 4-byte
flag), to make it a bit easier to follow compared to the spec.

[checkpatch.pl has some false positives in the comments]

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170717192635.17880-3-eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-07-17 17:06:46 -05:00
Eric Blake
48000eb3ec nbd: Trace client command being sent
Make the client trace slightly more legible by including the name
of the command being sent.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-Id: <20170717192635.17880-2-eblake@redhat.com>
2017-07-17 17:06:30 -05:00
Richard Henderson
dbdaaff43a target/s390x: Fix risbg handling
The rotation is to the left, but extract shifts to the right.
The computation of the extract parameters needs adjusting.

For the entry condition, simplify

	64 - rot + len <= 64
	-rot + len <= 0
	len <= rot

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Reported-by: David Hildenbrand <david@redhat.com>
Suggested-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-17 14:13:17 -07:00
David Hildenbrand
19d70587b5 target/s390x: Allow to enable "idtes" feature for TCG
STFL bit 4 and 5 are just indications to the guest, which TLB entries an
IDTE call will clear. These are performance indicators for the guest.

STFL bit 4:
    INVALIDATE DAT TABLE ENTRY (IDTE) performs
    the invalidation-and-clearing operation by
    selectively clearing TLB segment-table entries
    when a segment-table entry or entries are
    invalidated. IDTE also performs the clearing-by-
    ASCE operation. Unless bit 4 is one, IDTE simply
    purges all TLBs. Bit 3 is one if bit 4 is one.

We can simply set STFL bit 4 ("idtes") and still purge the complete TLB.
Purging more than advertised is never bad. E.g. Linux doesn't even care
about this bit. We can optimized this later.
This is helpful, as the z9 base model contains this facility.

STFL bit 5 (clearing TLB region-table-entries) was never implemented on
real HW, therefore we can simply ignore it for now.

Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170627161032.5014-1-david@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-17 14:13:17 -07:00
Richard Henderson
e89ea6aa9d target/s390x: Mark ETF3 and ETF3_ENH facilities as available
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-17 14:13:17 -07:00
Richard Henderson
b213c9f58e target/s390x: Implement TRTR
Drop TRT from the set of insns handled internally by EXECUTE.
It's more important to adjust the existing helper to handle
both TRT and TRTR.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-17 14:13:17 -07:00
Richard Henderson
be7acb5839 target/s390x: Implement SRSTU
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-17 14:13:17 -07:00
Richard Henderson
7591db780d target/s390x: Tidy SRST
Since we require all registers saved on input, read R0 from ENV instead
of passing it manually.  Recognize the specification exception when R0
contains incorrect data.  Keep high bits of result registers unmodified
when in 31 or 24-bit mode.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-17 14:13:17 -07:00
Richard Henderson
941ef3db03 target/s390x: Implement CONVERT UNICODE insns
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-17 14:13:16 -07:00
Richard Henderson
c67ba30307 target/s390x: Implement CSST
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-17 14:13:16 -07:00
Eric Blake
9a76bd783d nbd: Fix iotests failure due to changed client error message
Commit 8ecaeae8 changed the way the client requests an NBD export,
and in the process also changed the resulting error message when
the export is not present, breaking a couple of iotests.  The error
message is now directly given by the server (a failed NBD_OPT_GO)
instead of implied by the client (after exhausting NBD_OPT_LIST),
but looking at the testsuite changes, it proves worthwhile to
reword the error message to be slightly less verbose (as this is
one particular error message likely to be hit by a user).

Note that the error message is now sensitive to which binary is
running the server as well as the client (since the expected
output is replaying a message received from the server - for that
matter, it depends on a server new enough to understand NBD_OPT_GO);
in general iotests are run on client and server from the same source
code base so the default setup will pass; but if it proves
problematic for people overriding QEMU_PROG, QEMU_IMG_PROG,
QEMU_IO_PROG, and QEMU_NBD_PROG to point across multiple builds for
cross-version integration testing, we may have to later tweak or
sanitize the output somehow.

Reported-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170717142310.17048-1-eblake@redhat.com>
Tested-by: John Snow <jsnow@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-07-17 13:57:42 -05:00
Eduardo Habkost
f86285c571 qmp: Include parent type on 'qom-list-types' output
Include name of parent type of each type on 'qom-list-types' output.

Without this, there's no way to figure out the parents of a given type
without making additional 'qom-list-types' queries.

In addition to the test case for the new feature, update the
abstract-interface test case to use the new field and avoid the
"qom-list-types implements=object" trick.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170707122215.8819-4-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Eduardo Habkost
87467eae37 qmp: Include 'abstract' field on 'qom-list-types' output
A client may be interested in getting the list of both abstract and
non-abstract types.  Instead of requiring them to make multiple queries
with different filter arguments, just return an 'abstract' field in
'qom-list-types'.

In addition to the new test code for validating this field, update the
abstract-interfaces test case to query for all 'interface' subtypes
(including abstract ones), and to look at the 'abstract' field directly.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170707122215.8819-3-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Eduardo Habkost
dbb2a604a9 tests: Simplify abstract-interfaces check with a helper
Add a new type_list_find() helper to device-introspect-test.c, to
simplify the code at test_abstract_interfaces().

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170707122215.8819-2-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Boqun Feng (Intel)
53f9a6f45f i386: add Skylake-Server cpu model
Introduce Skylake-Server cpu mode which inherits the features from
Skylake-Client and supports some additional features that are: AVX512,
CLWB and PGPE1GB.

Signed-off-by: Boqun Feng (Intel) <boqun.feng@gmail.com>
Message-Id: <20170621052935.20715-1-boqun.feng@gmail.com>
[ehabkost: copied comment about XSAVES from Skylake-Client]
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Eduardo Habkost
cf70879f14 i386: Update comment about XSAVES on Skylake-Client
Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Daniel P. Berrange
1ce36bfe64 i386: expose "TCGTCGTCGTCG" in the 0x40000000 CPUID leaf
Currently when running KVM, we expose "KVMKVMKVM\0\0\0" in
the 0x40000000 CPUID leaf. Other hypervisors (VMWare,
HyperV, Xen, BHyve) all do the same thing, which leaves
TCG as the odd one out.

The CPUID signature is used by software to detect which
virtual environment they are running in and (potentially)
change behaviour in certain ways. For example, systemd
supports a ConditionVirtualization= setting in unit files.
The virt-what command can also report the virt type it is
running on

Currently both these apps have to resort to custom hacks
like looking for 'fw-cfg' entry in the /proc/device-tree
file to identify TCG.

This change thus proposes a signature "TCGTCGTCGTCG" to be
reported when running under TCG.

To hide this, the -cpu option tcg-cpuid=off can be used.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <20170509132736.10071-3-berrange@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Mark Cave-Ayland
39736e18cd fw_cfg: move QOM type defines and fw_cfg types into fw_cfg.h
By exposing FWCfgIoState and FWCfgMemState internals we allow the possibility
for the internal MemoryRegion fields to be mapped by name for boards that wish
to wire up the fw_cfg device themselves.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Message-Id: <1500025208-14827-4-git-send-email-mark.cave-ayland@ilande.co.uk>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Mark Cave-Ayland
38f3adc34d fw_cfg: move qdev_init_nofail() from fw_cfg_init1() to callers
When looking to instantiate a TYPE_FW_CFG_MEM or TYPE_FW_CFG_IO device to be
able to wire it up differently, it is much more convenient for the caller to
instantiate the device and have the fw_cfg default files already preloaded
during realize.

Move fw_cfg_init1() to the end of both the fw_cfg_mem_realize() and
fw_cfg_io_realize() functions so it no longer needs to be called manually
when instantiating the device, and also rename it to fw_cfg_common_realize()
which better describes its new purpose.

Since it is now the responsibility of the machine to wire up the fw_cfg device
it is necessary to introduce a object_property_add_child() call into
fw_cfg_init_io() and fw_cfg_init_mem() to link the fw_cfg device to the root
machine object as before.

Finally with the previous change to fw_cfg_find() we can now remove the
assert() preventing multiple fw_cfg devices being instantiated and replace
them with a simple call to fw_cfg_find() at realize time instead. This allows
us to remove FW_CFG_NAME and FW_CFG_PATH since they are no longer required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1500025208-14827-3-git-send-email-mark.cave-ayland@ilande.co.uk>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Mark Cave-Ayland
6e99c075a0 fw_cfg: switch fw_cfg_find() to locate the fw_cfg device by type rather than path
This will enable the fw_cfg device to be placed anywhere within the QOM tree
regardless of its machine location.

Note that we also add a comment to document the behaviour that we return NULL to
indicate failure where either no fw_cfg device or multiple fw_cfg devices are
found.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Message-Id: <1500025208-14827-2-git-send-email-mark.cave-ayland@ilande.co.uk>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Eduardo Habkost
ebcc479eee qom: Fix ambiguous path detection when ambiguous=NULL
object_resolve_path*() ambiguous path detection breaks when
ambiguous==NULL and the object tree have 3 objects of the same type and
only 2 of them are under the same parent.  e.g.:

 /container/obj1 (TYPE_FOO)
 /container/obj2 (TYPE_FOO)
 /obj2 (TYPE_FOO)

With the above tree, object_resolve_path_type("", TYPE_FOO, NULL) will
incorrectly return /obj2, because the search inside "/container" will
return NULL, and the match at "/obj2" won't be detected as ambiguous.

Fix that by always calling object_resolve_partial_path() with a non-NULL
ambiguous parameter.

Test case included.

Reported-by: Igor Mammedov <imammedo@redhat.com>
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170707213052.13087-3-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Eduardo Habkost
6d1e30c4ac Revert "machine: Convert abstract typename on compat_props to subclass names"
This reverts commit 0bcba41fe3.

The bug addressed by that commit is now fixed in a better way by the
commit "qdev: fix the order compat and global properties are applied".

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170711004303.3902-4-ehabkost@redhat.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Eduardo Habkost
3caca55558 test-qdev-global-props: Test global property ordering
Test case to detect the bug fixed by commit
"qdev: fix the order compat and global properties are applied".

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170711004303.3902-3-ehabkost@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:30 -03:00
Greg Kurz
5eb6a3c501 qdev: fix the order compat and global properties are applied
The current code recursively applies global properties from child up to
parent types. This can cause properties passed with the -global option to
be silently overridden by internal compat properties.

This is exactly what happened with virtio-*-pci drivers since commit:

"9a4c0e220d8a hw/virtio-pci: fix virtio behaviour"

Passing -device virtio-blk-pci.disable-modern=off had no effect on 2.6
machine types because the internal virtio-pci.disable-modern=on compat
property always prevailed.

A workaround for this was included with commit 0bcba41f ("machine:
Convert abstract typename on compat_props to subclass names").

This patch fixes the issue properly by reversing the logic: we now go
through the global property list and, for each property, we check if it
is applicable to the device.

This results in compat properties being applied first, in the order they
appear in the HW_COMPAT_* macros, followed by global properties, in the
order they appear on the command line.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <148103887228.22326.478406873609299999.stgit@bahia.lab.toulouse-stg.fr.ibm.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170711004303.3902-2-ehabkost@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:29 -03:00
Eduardo Habkost
3f0058bbc1 tests: Test case for object_resolve_path*()
Test for partial path lookup using object_resolve_path*().

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170707213052.13087-2-ehabkost@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-07-17 15:41:29 -03:00
Eduardo Habkost
2a6f395b9a device-crash-test: Fix regexp on whitelist
The "||" in the whitelist entry was not escaped, making the regexp match
all strings, on every single cases where QEMU aborted.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170614144939.1115-1-ehabkost@redhat.com>
2017-07-17 15:41:29 -03:00
Alexey Kardashevskiy
8c37faa475 vfio-pci, ppc64/spapr: Reorder group-to-container attaching
At the moment VFIO PCI device initialization works as follows:
vfio_realize
	vfio_get_group
		vfio_connect_container
			register memory listeners (1)
			update QEMU groups lists
		vfio_kvm_device_add_group

Then (example for pseries) the machine reset hook triggers region_add()
for all regions where listeners from (1) are listening:

ppc_spapr_reset
	spapr_phb_reset
		spapr_tce_table_enable
			memory_region_add_subregion
				vfio_listener_region_add
					vfio_spapr_create_window

This scheme works fine until we need to handle VFIO PCI device hotplug
and we want to enable PPC64/sPAPR in-kernel TCE acceleration on,
i.e. after PCI hotplug we need a place to call
ioctl(vfio_kvm_device_fd, KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE).
Since the ioctl needs a LIOBN fd (from sPAPRTCETable) and a IOMMU group fd
(from VFIOGroup), vfio_listener_region_add() seems to be the only place
for this ioctl().

However this only works during boot time because the machine reset
happens strictly after all devices are finalized. When hotplug happens,
vfio_listener_region_add() is called when a memory listener is registered
but when this happens:
1. new group is not added to the container->group_list yet;
2. VFIO KVM device is unaware of the new IOMMU group.

This moves bits around to have all necessary VFIO infrastructure
in place for both initial startup and hotplug cases.

[aw: ie, register vfio groups with kvm prior to memory listener
registration such that kvm-vfio pseudo device ioctls are available
during the region_add callback]

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-07-17 12:39:09 -06:00
Peter Maydell
5a477a7806 Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Mon 17 Jul 2017 13:11:17 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  trace: update old trace events in docs
  trace: [trivial] Statically enable all guest events
  trace: [tcg, trivial] Re-align generated code
  trace: [tcg] Do not generate TCG code to trace dynamically-disabled events
  exec: [tcg] Use different TBs according to the vCPU's dynamic tracing state
  trace: [tcg] Delay changes to dynamic state when translating
  trace: Allocate cpu->trace_dstate in place

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 18:39:32 +01:00
Peter Maydell
ca4e667dbf Merge remote-tracking branch 'remotes/kraxel/tags/usb-20170717-pull-request' into staging
ehci fix for 2.10

# gpg: Signature made Mon 17 Jul 2017 10:40:00 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/usb-20170717-pull-request:
  ehci: add sanity check for maxframes

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 17:54:17 +01:00
Peter Maydell
df55c7e8c8 Merge remote-tracking branch 'remotes/kraxel/tags/vga-20170717-pull-request' into staging
virtio-gpu migration fix for 2.10

# gpg: Signature made Mon 17 Jul 2017 10:41:49 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/vga-20170717-pull-request:
  virtio-gpu: skip update cursor in post_load if we don't have one

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 17:12:41 +01:00
Peter Maydell
3408d5aee0 Merge remote-tracking branch 'remotes/kraxel/tags/ui-20170717-pull-request' into staging
ui: vnc and keymap updates for 2.10

# gpg: Signature made Mon 17 Jul 2017 10:38:11 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/ui-20170717-pull-request:
  keymaps: fr-ca: add missing keys
  hmp: Update info vnc
  vnc: Set default kbd delay to 10ms

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 16:31:31 +01:00
Denis V. Lunev
593ed6f0a3 block: fix shadowed variable in bdrv_co_pdiscard
We've had a shadowed 'ret' variable, which risks returning the wrong
value, introduced in commit b9c64947.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170710150559.30163-1-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-17 15:58:37 +01:00
Alistair Francis
55d41b16ee util/aio-win32: Only select on what we are actually waiting for
Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Acked-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 9307b70e9876c4e9e3c4478524a32a23a3d5dd05.1499368180.git.alistair.francis@xilinx.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-17 15:58:37 +01:00
Aurelien Jarno
06a57e5cc7 target/mips: optimize WSBH, DSBH and DSHD
Use the same mask to avoid having to load two different constants.

Suggested-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-17 16:48:21 +02:00
Pavel Dovgalyuk
c6c2c0fc32 mips: set CP0 Debug DExcCode for SDBBP instruction
This patch fixes setting DExcCode field of CP0 Debug register
when SDBBP instruction is executed. According to EJTAG specification,
this field must be set to the value 9 (Bp).

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-id: 20170502120350.3368.92338.stgit@PASHA-ISP
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-07-17 16:48:21 +02:00
Peter Maydell
63dba7e6b1 Merge remote-tracking branch 'remotes/kraxel/tags/audio-20170717-pull-request' into staging
audio: fixes for 2.10

# gpg: Signature made Mon 17 Jul 2017 10:29:08 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/audio-20170717-pull-request:
  audio/adlib: remove limitation of one adlib card
  audio/fmopl: modify timer callback to give opaque and channel parameters in two arguments
  audio: st_rate_flow exist a infinite loop

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 15:43:15 +01:00
Peter Maydell
1f244ebbba Merge remote-tracking branch 'remotes/kraxel/tags/ipxe-pull-request' into staging
Pull request

# gpg: Signature made Mon 17 Jul 2017 10:04:11 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/ipxe-pull-request:
  ipxe: update to commit 0600d3ae94

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 15:05:29 +01:00
Alexander Graf
73663d71ef PPC: E500: Update u-boot to v2017.07
Quite a while has passed since we last updated U-Boot for e500. This patch
bumps it to the last released version 2017.07 to make sure users don't feel
like they're using out of date software.

Signed-off-by: Alexander Graf <agraf@suse.de>
Tested-by: Thomas Huth <thuth@redhat.com>
Message-id: 1499862868-102130-1-git-send-email-agraf@suse.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 13:50:47 +01:00
Peter Maydell
e5a6a6e64e MAINTAINERS: Add entries for MPS2 board
Add entries to the MAINTAINERS file for the new MPS2
board and devices.

Since the CMSDK devices are not specific to the MPS2 board,
extend the existing 'PrimeCell' section to cover CMSDK
devices as well; in both cases these are devices implemented
by ARM and provided as RTL that may be used in multiple
SoCs and boards.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1500029487-14822-10-git-send-email-peter.maydell@linaro.org
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-07-17 13:36:09 +01:00
Peter Maydell
3587393922 hw/arm/mps2: Add ethernet
The MPS2 FPGA images support ethernet via a LAN9220. We use
QEMU's LAN9118 model, which is software compatible except
that it is missing the checksum-offload feature.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1500029487-14822-9-git-send-email-peter.maydell@linaro.org
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-07-17 13:36:09 +01:00
Peter Maydell
6dbdf4ec33 hw/arm/mps2: Add SCC
Add the SCC to the MPS2 board models.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1500029487-14822-8-git-send-email-peter.maydell@linaro.org
2017-07-17 13:36:08 +01:00
Peter Maydell
dd73185b7d hw/misc/mps2_scc: Implement MPS2 Serial Communication Controller
Implement a model of the Serial Communication Controller (SCC) found
in MPS2 FPGA images.

The primary purpose of this device is to communicate with the
Motherboard Configuration Controller (MCC) which is located on
the MPS board itself, outside the FPGA image. This is used
for programming the MPS clock generators. The SCC also has
some basic ID registers and an output for the board LEDs.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1500029487-14822-7-git-send-email-peter.maydell@linaro.org
2017-07-17 13:36:08 +01:00
Peter Maydell
3d53904a68 hw/arm/mps2: Add timers
Add the CMSDK APB timers to the MPS2 board.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1500029487-14822-6-git-send-email-peter.maydell@linaro.org
2017-07-17 13:36:08 +01:00
Peter Maydell
5dd85b4b48 hw/char/cmsdk-apb-timer: Implement CMSDK APB timer device
Implement a model of the simple timer device found in the CMSDK.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1500029487-14822-5-git-send-email-peter.maydell@linaro.org
2017-07-17 13:36:08 +01:00
Peter Maydell
977a15f480 hw/arm/mps2: Add UARTs
Add the UARTs to the MPS2 board models.

Unfortunately the details of the wiring of the interrupts through
various OR gates differ between AN511 and AN385 so this can't
be purely a data-driven difference.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Message-id: 1500029487-14822-4-git-send-email-peter.maydell@linaro.org
2017-07-17 13:36:08 +01:00
Peter Maydell
775df84e48 hw/char/cmsdk-apb-uart.c: Implement CMSDK APB UART
Implement a model of the simple "APB UART" provided in
the Cortex-M System Design Kit (CMSDK).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1500029487-14822-3-git-send-email-peter.maydell@linaro.org
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-07-17 13:36:08 +01:00
Peter Maydell
2eb5578b47 hw/arm/mps2: Implement skeleton mps2-an385 and mps2-an511 board models
Model the ARM MPS2/MPS2+ FPGA based development board.

The MPS2 and MPS2+ dev boards are FPGA based (the 2+ has a bigger
FPGA but is otherwise the same as the 2). Since the CPU itself
and most of the devices are in the FPGA, the details of the board
as seen by the guest depend significantly on the FPGA image.

We model the following FPGA images:
 "mps2_an385" -- Cortex-M3 as documented in ARM Application Note AN385
 "mps2_an511" -- Cortex-M3 'DesignStart' as documented in AN511

They are fairly similar but differ in the details for some
peripherals.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1500029487-14822-2-git-send-email-peter.maydell@linaro.org
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-07-17 13:36:08 +01:00
Alex Bennée
b29fd33db5 target/arm: use DISAS_EXIT for eret handling
Previously DISAS_JUMP did ensure this but with the optimisation of
8a6b28c7 (optimize indirect branches) we might not leave the loop.
This means if any pending interrupts are cleared by changing IRQ flags
we might never get around to servicing them. You usually notice this
by seeing the lookup_tb_ptr() helper gainfully chaining TBs together
while cpu->interrupt_request remains high and the exit_request has not
been set.

This breaks amongst other things the OPTEE test suite which executes
an eret from the secure world after a non-secure world IRQ has gone
pending which then never gets serviced.

Instead of using the previously implied semantics of DISAS_JUMP we use
DISAS_EXIT which will always exit the run-loop.

CC: Etienne Carriere <etienne.carriere@linaro.org>
CC: Joakim Bech <joakim.bech@linaro.org>
CC: Jaroslaw Pelczar <j.pelczar@samsung.com>
CC: Peter Maydell <peter.maydell@linaro.org>
CC: Emilio G. Cota <cota@braap.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-id: 20170713141928.25419-7-alex.bennee@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 13:36:07 +01:00
Alex Bennée
0b609cc128 target/arm: use gen_goto_tb for ISB handling
While an ISB will ensure any raised IRQs happen on the next
instruction it doesn't cause any to get raised by itself. We can
therefore use a simple tb exit for ISB instructions and rely on the
exit_request check at the top of each TB to deal with exiting if
needed.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-id: 20170713141928.25419-6-alex.bennee@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 13:36:07 +01:00
Alex Bennée
4cae8f56fb target/arm/translate: ensure gen_goto_tb sets exit flags
As the gen_goto_tb function can do both static and dynamic jumps it
should also set the is_jmp field. This matches the behaviour of the
a64 code.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-id: 20170713141928.25419-5-alex.bennee@linaro.org
[tweak to multiline comment formatting]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 13:36:07 +01:00
Alex Bennée
abd1fb0ee2 target/arm/translate.h: expand comment on DISAS_EXIT
We already have an exit condition, DISAS_UPDATE which will exit the
run-loop. Expand on the difference with DISAS_EXIT in the comments.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-id: 20170713141928.25419-4-alex.bennee@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 13:36:07 +01:00
Alex Bennée
e8d5230221 target/arm/translate: make DISAS_UPDATE match declared semantics
DISAS_UPDATE should be used when the wider CPU state other than just
the PC has been updated and we should therefore exit the TCG runtime
and return to the main execution loop rather assuming DISAS_JUMP would
do that.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-id: 20170713141928.25419-3-alex.bennee@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 13:36:07 +01:00
Alex Bennée
df0311e634 include/exec/exec-all: document common exit conditions
As a precursor to later patches attempt to come up with a more
concrete wording for what each of the common exit cases would be.

CC: Emilio G. Cota <cota@braap.org>
CC: Richard Henderson <rth@twiddle.net>
CC: Lluís Vilanova <vilanova@ac.upc.edu>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-id: 20170713141928.25419-2-alex.bennee@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 13:36:07 +01:00
Peter Maydell
8d92e26b45 target/arm: Make Cortex-M3 and M4 default to 8 PMSA regions
The Cortex-M3 and M4 CPUs always have 8 PMSA MPU regions (this isn't
a configurable option for the hardware).  Make the default value of
the pmsav7-dregion property be set per-cpu, so we don't need to have
every user of these CPUs set it manually.  (The existing default of
16 is correct for the other PMSAv7 core, the Cortex-R5.)

This fixes a bug where we were creating the M3 and M4 with
too many regions; most guest software would not notice or
care, though, since it would just not use the registers
associated with the unexpected extra regions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1499788408-10096-4-git-send-email-peter.maydell@linaro.org
2017-07-17 13:36:07 +01:00
Peter Maydell
5cc56cc687 qdev: support properties which don't set a default value
In some situations it's useful to have a qdev property which doesn't
automatically set its default value when qdev_property_add_static is
called (for instance when the default value is not constant).

Support this by adding a flag to the Property struct indicating
whether to set the default value.  This replaces the existing test
for whether the PropertyInfo set_default_value function pointer is
NULL, and we set the .set_default field to true for all those cases
of struct Property which use a PropertyInfo with a non-NULL
set_default_value, so behaviour remains the same as before.

This gives us the semantics of:
 * if .set_default is true, then .info->set_default_value must
   be not NULL, and .defval is used as the the default value of
   the property
 * otherwise, the property system does not set any default, and
   the field will retain whatever initial value it was given by
   the device's .instance_init method

We define two new macros DEFINE_PROP_SIGNED_NODEFAULT and
DEFINE_PROP_UNSIGNED_NODEFAULT, to cover the most plausible use cases
of wanting to set an integer property with no default value.

Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-id: 1499788408-10096-3-git-send-email-peter.maydell@linaro.org
2017-07-17 13:36:06 +01:00
Peter Maydell
d9a7b125d6 qdev-properties.h: Explicitly set the default value for arraylen properties
In DEFINE_PROP_ARRAY, because we use a PropertyInfo (qdev_prop_arraylen)
which has a .set_default_value member we will set the field to a default
value. That default value will be zero, by the C rule that struct
initialization sets unmentioned members to zero if at least one member
is initialized. However it's clearer to state it explicitly.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1499788408-10096-2-git-send-email-peter.maydell@linaro.org
2017-07-17 13:36:06 +01:00
Jason Wang
189ae6bb5c virtio-net: fix offload ctrl endian
Spec said offloads should be le64, so use virtio_ldq_p() to guarantee
valid endian.

Fixes: 644c98587d ("virtio-net: dynamic network offloads configuration")
Cc: qemu-stable@nongnu.org
Cc: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:13:56 +08:00
Michal Privoznik
5f997fd17b virtion-net: Prefer is_power_of_2()
We have a function that checks if given number is power of two.
We should prefer it instead of expanding the check on our own.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:13:55 +08:00
Zhang Chen
2484ff0624 docs/colo-proxy.txt: Update colo-proxy usage of net driver with vnet_header
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:13:54 +08:00
Zhang Chen
4b39bdced5 net/filter-rewriter.c: Make filter-rewriter support vnet_hdr_len
We add the vnet_hdr_support option for filter-rewriter, default is disabled.
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
You can use it for example:
-object filter-rewriter,id=rew0,netdev=hn0,queue=all,vnet_hdr_support

We get the vnet_hdr_len from NetClientState that make us
parse net packet correctly.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:13:53 +08:00
Zhang Chen
d63b366a26 net/colo-compare.c: Add vnet packet's tcp/udp/icmp compare
COLO-Proxy just focus on packet payload, so we skip vnet header.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:13:52 +08:00
Zhang Chen
5cc444d367 net/colo.c: Add vnet packet parse feature in colo-proxy
Make colo-compare and filter-rewriter can parse vnet packet.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:13:51 +08:00
Zhang Chen
aa3a7032f7 net/colo-compare.c: Make colo-compare support vnet_hdr_len
We add the vnet_hdr_support option for colo-compare, default is disabled.
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
You can use it for example:
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support

COLO-compare can get vnet header length from filter,
Add vnet_hdr_len to struct packet and output packet with
the vnet_hdr_len.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:13:50 +08:00
Zhang Chen
3037e7a5b7 net/colo-compare.c: Introduce parameter for compare_chr_send()
This patch change the compare_chr_send() parameter from CharBackend to CompareState,
we can get more information like vnet_hdr(We use it to support packet with vnet_header).

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:13:49 +08:00
Zhang Chen
ada1a33f9a net/colo.c: Make vnet_hdr_len as packet property
We can use this property flush and send packet with vnet_hdr_len.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:13:48 +08:00
Zhang Chen
00d5c2406b net/filter-mirror.c: Add new option to enable vnet support for filter-redirector
We add the vnet_hdr_support option for filter-redirector, default is disabled.
If you use virtio-net-pci net driver or other driver needs vnet_hdr, please enable it.
Because colo-compare or other modules needs the vnet_hdr_len to parse
packet, we add this new option send the len to others.
You can use it for example:
-object filter-redirector,id=r0,netdev=hn0,queue=tx,outdev=red0,vnet_hdr_support

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:13:47 +08:00
Zhang Chen
e2521f0e03 net/filter-mirror.c: Make filter mirror support vnet support.
We add the vnet_hdr_support option for filter-mirror, default is disabled.
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
You can use it for example:
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support

If it has vnet_hdr_support flag, we will change the sending packet format from
struct {int size; const uint8_t buf[];} to {int size; int vnet_hdr_len; const uint8_t buf[];}.
make other module(like colo-compare) know how to parse net packet correctly.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:13:45 +08:00
Stefan Hajnoczi
304187c51c trace: update old trace events in docs
Commit c5f1ad429c ("block: Remove
bdrv_aio_readv/writev/flush()") removed
bdrv_aio_readv()/bdrv_aio_writev() so the example in the tracing
documentation is no longer valid.

Reported-by: Wang Dong <dongdwdw@linux.vnet.ibm.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170714133111.27359-1-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-17 13:11:13 +01:00
Lluís Vilanova
5caa262fda trace: [trivial] Statically enable all guest events
The existing optimizations makes it feasible to have them available on all
builds.

Some quick'n'dirty numbers with 400.perlbench (SPECcpu2006) on the train input
(medium size - suns.pl) and the guest_mem_before event:

* vanilla, statically disabled
real    0m2,259s
user    0m2,252s
sys     0m0,004s

* vanilla, statically enabled (overhead: 2.18x)
real    0m4,921s
user    0m4,912s
sys     0m0,008s

* multi-tb, statically disabled (overhead: 0.99x) [within noise range]
real    0m2,228s
user    0m2,216s
sys     0m0,008s

* multi-tb, statically enabled (overhead: 0.99x) [within noise range]
real    0m2,229s
user    0m2,224s
sys     0m0,004s

Now enabling all events when booting an ARM system that immediately shuts down
(https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg04085.html):

* vanilla, statically disabled
real	0m32,153s
user	0m31,276s
sys	0m0,108s

* vanilla, statically enabled (overhead: 1.35x)
real	0m43,507s
user	0m42,680s
sys	0m0,168s

* multi-tb, statically disabled (overhead: 1.03x)
real	0m32,993s
user	0m32,516s
sys	0m0,104s

* multi-tb, statically enabled (overhead: 1.00x) [within noise range]
real	0m32,110s
user	0m31,176s
sys	0m0,156s

And finally enabling all events using Emilio's dbt-bench
(where orig == vanilla, new == multi-tb):

                                                        NBench score; higher is better

  180 +-+--------+----------+----------+---------+----------+----------+----------+----------+----------+---------+----------+--------+-+
      |                                                                                                                                 |
      |                                      *** $$$$%%                                                                    orig         |
  160 +-+....................................*.*.$..$.%............................................................orig-enabled       +-+
      |                                      * * $  $ %                                                                     new         |
  140 +-+....................................*.*.$..$.%............................................................new-disabled.......+-+
      |                                      * * $  $ %                                                                                 |
      |                                      * * $  $ %                                                                                 |
  120 +-+....................................*.*.$..$.%...............................................................................+-+
      |                                      * * $  $ %                                                                                 |
      |                                      * * $  $ %                                                                                 |
  100 +-+....................................*.*.$..$.%.....$$$%%%....................................................................+-+
      |                                      * * $  $ % *** $ $  % *** $$$%%                                                            |
   80 +-+....................................*.*.$..$.%.*.*.$.$..%.*.*.$.$.%..........................................................+-+
      |                                      * * $  $ % * * $ $  % * * $ $ %                                                            |
      |                                      * * $  $ % * * $ $  % * * $ $ %                                                            |
   60 +-+.........................***..$$$%%.*.*##..$.%.*.*.$.$..%.*.*.$.$.%..***.$$$%%...............................................+-+
      |                **** $$$%% * *  $ $ % * * #  $ % * *## $  % * * $ $ %  * * $ $ %                                                 |
      |                *  * $ $ % * *  $ $ % * * #  $ % * * # $  % * *## $ %  * * $ $ %                                                 |
   40 +-+..............*..*.$.$.%.*.*..$.$.%.*.*.#..$.%.*.*.#.$..%.*.*.#.$.%..*.*.$.$.%...............................................+-+
      |                *  * $ $ % * *  $ $ % * * #  $ % * * # $  % * * # $ %  * *## $ %                                  *** $$$%%%     |
   20 +-+....***.$$$%%.*..*##.$.%.*.*###.$.%.*.*.#..$.%.*.*.#.$..%.*.*.#.$.%..*.*.#.$.%..................................*.*.$.$..%...+-+
      |      * *## $ % *  * # $ % * *  # $ % * * #  $ % * * # $  % * * # $ %  * * # $ %                                  * *## $  %     |
      |      * * # $ % *  * # $ % * *  # $ % * * #  $ % * * # $  % * * # $ %  * * # $ %            ***###$$%% ***##$$$%% * * # $  %     |
    0 +-+----***##$$%%-****##$$%%-***###$$%%-***##$$$%%-***##$$%%%-***##$$%%--***##$$%%-****##$$%%-***###$$%%-***##$$$%%-***##$$%%%---+-+
     NUMERIC SORTSTRING SORT   BITFIEFP EMULATION ASSIGNMENT       IDEA    HUFFMAN    FOURIER NEURLU DECOMPOSITION      gmean
png: http://imgur.com/a/8XG5S

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Reviewed-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-id: 149915849243.6295.4484103824675839071.stgit@frigg.lan
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-17 13:11:13 +01:00
Lluís Vilanova
1ff7b53196 trace: [tcg, trivial] Re-align generated code
Last patch removed a nesting level in generated code. Re-align all code
generated by backends to be 4-column aligned.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-id: 149915824586.6295.17820926011082409033.stgit@frigg.lan
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-17 13:11:13 +01:00
Lluís Vilanova
864a2178d4 trace: [tcg] Do not generate TCG code to trace dynamically-disabled events
If an event is dynamically disabled, the TCG code that calls the
execution-time tracer is not generated.

Removes the overheads of execution-time tracers for dynamically disabled
events. As a bonus, also avoids checking the event state when the
execution-time tracer is called from TCG-generated code (since otherwise
TCG would simply not call it).

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-id: 149915799921.6295.13067154430923434035.stgit@frigg.lan
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-17 13:11:12 +01:00
Lluís Vilanova
61a67f71dd exec: [tcg] Use different TBs according to the vCPU's dynamic tracing state
Every vCPU now uses a separate set of TBs for each set of dynamic
tracing event state values. Each set of TBs can be used by any number of
vCPUs to maximize TB reuse when vCPUs have the same tracing state.

This feature is later used by tracetool to optimize tracing of guest
code events.

The maximum number of TB sets is defined as 2^E, where E is the number
of events that have the 'vcpu' property (their state is stored in
CPUState->trace_dstate).

For this to work, a change on the dynamic tracing state of a vCPU will
force it to flush its virtual TB cache (which is only indexed by
address), and fall back to the physical TB cache (which now contains the
vCPU's dynamic tracing state as part of the hashing function).

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-id: 149915775266.6295.10060144081246467690.stgit@frigg.lan
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-17 13:11:05 +01:00
Lluís Vilanova
d43811165d trace: [tcg] Delay changes to dynamic state when translating
This keeps consistency across all decisions taken during translation
when the dynamic state of a vCPU is changed in the middle of translating
some guest code.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-id: 149915750615.6295.3713699402253529487.stgit@frigg.lan
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-17 13:10:54 +01:00
Lluís Vilanova
d01c05c955 trace: Allocate cpu->trace_dstate in place
There's little point in dynamically allocating the bitmap if we
know at compile-time the max number of events we want to support.
Thus, make room in the struct for the bitmap, which will make things
easier later: this paves the way for upcoming changes, in which
we'll use a u32 to fully capture cpu->trace_dstate.

This change also increases performance by saving a dereference and
improving locality--note that this is important since upcoming work
makes reading this bitmap fairly common.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Reviewed-by: Lluís Vilanova <vilanova@ac.upc.edu>
Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 149915725977.6295.15069969323605305641.stgit@frigg.lan
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-17 13:10:45 +01:00
Zhang Chen
dc3c5ac645 net/filter-mirror.c: Introduce parameter for filter_send()
This patch change the filter_send() parameter from CharBackend to MirrorState,
we can get more information like vnet_hdr(We use it to support packet with vnet_header).

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:02:11 +08:00
Zhang Chen
3cde5ea211 net/net.c: Add vnet_hdr support in SocketReadState
We add a flag to decide whether net_fill_rstate() need read
the vnet_hdr_len or not.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Suggested-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:02:11 +08:00
Zhang Chen
d6b732e953 net: Add vnet_hdr_len arguments in NetClientState
Add vnet_hdr_len arguments in NetClientState
that make other module get real vnet_hdr_len easily.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-07-17 20:02:09 +08:00
Peter Maydell
77031ee1ce Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.10-20170717' into staging
ppc patch queue 2017-07-17

This pull requests supersedes the one from 2017-07-14.  That one had a
couple of subtle regressions: there was a build error for mingw32, and
an instance_size which was theoretically wrong everywhere, but only
actually bit on the Travis OSX build.

There are two major batches in this set, rather than the usual
collection of assorted fixes.

    * More DRC cleanup.  This gets the state management into a state
      which should fix many of the hotplug+migration problems we've
      had.  Plus it gets the migration stream format into something
      well defined and pretty minimal which we can reasonably support
      into the future.

    * Hashed Page Table resizing.  It's been a while since this was
      posted, but it's been through several previous rounds of review.
      The kernel parts (both guest and host) are merged in 4.11, so
      this is the only remaining piece left to allow resizing of the
      HPT in a running guest.

There are also a handful of unrelated fixes.

# gpg: Signature made Mon 17 Jul 2017 07:36:52 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.10-20170717: (21 commits)
  target/ppc: fix CPU hotplug when radix is enabled (TCG)
  spapr: fix memory leak in spapr_core_pre_plug()
  pseries: Allow HPT resizing with KVM
  pseries: Use smaller default hash page tables when guest can resize
  pseries: Enable HPT resizing for 2.10
  pseries: Implement HPT resizing
  pseries: Stubs for HPT resizing
  ppc/pnv: Remove unused XICSState reference
  spapr: fix potential memory leak in spapr_core_plug()
  spapr: Implement DR-indicator for physical DRCs only
  spapr: Remove sPAPRConfigureConnectorState sub-structure
  spapr: Consolidate DRC state variables
  spapr: Cleanups relating to DRC awaiting_release field
  spapr: Refactor spapr_drc_detach()
  spapr: Abort on delete failure in spapr_drc_release()
  spapr: Simplify unplug path
  spapr: Remove 'awaiting_allocation' DRC flag
  spapr: Treat devices added before inbound migration as coldplugged
  spapr: Minor cleanups to events handling
  spapr: migrate pending_events of spapr state
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 12:52:59 +01:00
Peter Maydell
6632f6ff96 Merge remote-tracking branch 'remotes/famz/tags/block-and-testing-pull-request' into staging
# gpg: Signature made Mon 17 Jul 2017 04:47:05 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/block-and-testing-pull-request:
  travis: add no-TCG build
  docker.py: Improve subprocess exit code handling
  docker.py: Drop infile parameter
  docker: Don't enable networking as a side-effect of DEBUG=1
  ssh: support I/O from any AioContext
  sheepdog: add queue_lock
  qed: protect table cache with CoMutex
  qed: introduce bdrv_qed_init_state
  block: invoke .bdrv_drain callback in coroutine context and from AioContext
  qed: move tail of qed_aio_write_main to qed_aio_write_{cow, alloc}
  vvfat: make it thread-safe
  vpc: make it thread-safe
  vdi: make it thread-safe
  coroutine-lock: add qemu_co_rwlock_downgrade and qemu_co_rwlock_upgrade
  qcow2: call CoQueue APIs under CoMutex

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 11:46:36 +01:00
Gerd Hoffmann
10750ee0d6 virtio-gpu: skip update cursor in post_load if we don't have one
If the cursor resource id isn't set the guest didn't define a cursor.
Skip the cursor update in post_load in that that case.

Reported-by: wanghaibin <wanghaibin.wang@huawei.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Tested-by: wanghaibin <wanghaibin.wang@huawei.com>
Message-id: 20170710070432.856-1-kraxel@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-07-17 11:41:23 +02:00
Gerd Hoffmann
2a7f263068 ehci: add sanity check for maxframes
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 20170703111549.10924-1-kraxel@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-07-17 11:39:08 +02:00
Gerd Hoffmann
feb47cf2fa keymaps: fr-ca: add missing keys
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170712072305.29233-1-kraxel@redhat.com
2017-07-17 11:36:41 +02:00
Dr. David Alan Gilbert
0a9667ecdb hmp: Update info vnc
The QMP query-vnc interfaces have gained a lot more information that
the HMP interfaces hasn't got yet. Update it.

Note the output format has changed, but this is HMP so that's OK.

In particular, this now includes client information for reverse
connections:

-vnc :0
(qemu) info vnc
default:
  Server: 0.0.0.0:5900 (ipv4)
    Auth: none (Sub: none)

  (Now connect a client)

(qemu) info vnc
default:
  Server: 0.0.0.0:5900 (ipv4)
    Auth: none (Sub: none)
  Client: 127.0.0.1:51828 (ipv4)
    x509_dname: none
    sasl_username: none

-vnc localhost:7000,reverse
(qemu) info vnc
default:
  Client: ::1:7000 (ipv6)
    x509_dname: none
    sasl_username: none
  Auth: none (Sub: none)

-vnc :1,password,id=pass -vnc localhost:7000,reverse
(qemu) info vnc
default:
  Client: ::1:7000 (ipv6)
    x509_dname: none
    sasl_username: none
  Auth: none (Sub: none)
rev:
  Server: 0.0.0.0:5901 (ipv4)
    Auth: vnc (Sub: none)
  Client: 127.0.0.1:53616 (ipv4)
    x509_dname: none
    sasl_username: none

This was originally RH bz 1461682

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-id: 20170711154414.21111-1-dgilbert@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-07-17 11:36:09 +02:00
Alexander Graf
d3b0db6dfe vnc: Set default kbd delay to 10ms
The current VNC default keyboard delay is 1ms. With that we're constantly
typing faster than the guest receives keyboard events from an XHCI attached
USB HID device.

The default keyboard delay time in the input layer however is 10ms. I don't know
how that number came to be, but empirical tests on some OpenQA driven ARM
systems show that 10ms really is a reasonable default number for the delay.

This patch moves the VNC delay also to 10ms. That way our default is much
safer (good!) and also consistent with the input layer default (also good!).

Signed-off-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1499863425-103133-1-git-send-email-agraf@suse.de
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-07-17 11:35:27 +02:00
Hervé Poussineau
639b49ef9a audio/adlib: remove limitation of one adlib card
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170621043401.19842-3-hpoussin@reactos.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-07-17 11:09:02 +02:00
Hervé Poussineau
c57fbf50e7 audio/fmopl: modify timer callback to give opaque and channel parameters in two arguments
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170621043401.19842-2-hpoussin@reactos.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-07-17 11:09:02 +02:00
Peng Hao
facd0e9773 audio: st_rate_flow exist a infinite loop
If a voice recording equipment is opened for a long time(several days)
in windows guest, rate->ipos will overflow and rate->opos will never
have a chance to change. It will result to a infinite loop.

Signed-off-by: Peng Hao <peng.hao2@zte.com.cn>
Signed-off-by: Wang Yechao <wang.yechao255@zte.com.cn>
Message-id: 1500128061-20849-1-git-send-email-peng.hao2@zte.com.cn
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-07-17 11:08:59 +02:00
Peter Maydell
acbaa0f4fd Merge remote-tracking branch 'remotes/thibault/tags/samuel-thibault' into staging
slirp updates

# gpg: Signature made Sat 15 Jul 2017 13:30:03 BST
# gpg:                using RSA key 0xB0A51BF58C9179C5
# gpg: Good signature from "Samuel Thibault <samuel.thibault@aquilenet.fr>"
# gpg:                 aka "Samuel Thibault <sthibault@debian.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@gnu.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@inria.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@labri.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@ens-lyon.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@u-bordeaux.fr>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 900C B024 B679 31D4 0F82  304B D017 8C76 7D06 9EE6
#      Subkey fingerprint: AEBF 7448 FAB9 453A 4552  390E B0A5 1BF5 8C91 79C5

* remotes/thibault/tags/samuel-thibault:
  slirp: Handle error returns from sosendoob()
  slirp: Handle error returns from slirp_send() in sosendoob()
  slirp: fork_exec(): Don't close() a negative number in fork_exec()
  slirp: use DIV_ROUND_UP

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-17 10:02:22 +01:00
Gerd Hoffmann
18375b5c16 ipxe: update to commit 0600d3ae94
Rebase ipxe to latest git master.
Pick up four virtio-net fixes.

complete shortlog of ipxe changes
---------------------------------

Adamczyk, Konrad (1):
      [thunderx] Use ThunderxConfigProtocol to obtain board configuration

Bartosz Szczepanek (1):
      [thunderx] Fix hardware deinitialization

Christian Nilsson (1):
      [intel] Add INTEL_NO_PHY_RST for I219-LM (2)

David Decotigny (2):
      [build] Return const char * from uuid_ntoa()
      [af_packet] Add new AF_PACKET driver for Linux

Jason Wang (1):
      [virtio] Support VIRTIO_NET_F_IOMMU_PLATFORM

Jerone Young (1):
      [intel] Add support for I219-V in 7th Gen Intel NUC

Konrad Adamczyk (1):
      [thunderx] Don't disable NIC when exiting from iPXE

Ladi Prosek (3):
      [virtio] Cap queue size to MAX_QUEUE_NUM
      [virtio] Simplify virtqueue shutdown
      [virtio] Remove queue size limit in legacy virtio

Martin Habets (1):
      [sfc] Add driver for Solarflare SFC8XXX adapters

Michael Brown (159):
      [interface] Provide intf_reinit() to reinitialise nullified interfaces
      [iscsi] Avoid potential infinite loops during shutdown
      [efi] Add basic EFI SAN booting capability
      [undi] Allocate base memory before calling UNDI loader entry point
      [romprefix] Avoid using PMM-allocated memory in UNDI loader entry point
      [undi] Clean up driver and device name information
      [prefix] Remove impossible progress message
      [prefix] Include diagnostic information within progress messages
      [undi] Try matching UNDI ROMs in BIOS enumeration order
      [efi] Work around temporal anomaly encountered during ExitBootServices()
      [ipv4] Accept unicast packets for the local network broadcast address
      [build] Add %.vhd target for building VM bootable disk images
      [virtio] Use separate RX and TX empty header buffers
      [cloud] Add ability to retrieve Google Compute Engine metadata
      [virtio] Use host-specified MTU when available
      [netdevice] Allow MTU to be changed at runtime
      [cloud] Show CPU vendor and model in example cloud boot scripts
      [hyperv] Ignore unsolicited VMBus messages
      [pic8259] Fix definitions for "read IRR" and "read ISR" commands
      [efi] Fix building elf2efi.c when -fpic is enabled by default
      [interface] Avoid unnecessary reference counting in intf_unplug()
      [interface] Remove misleading comment
      [interface] Unplug interface before calling intf_close() in intf_shutdown()
      [netdevice] Limit MTU by hardware maximum frame length
      [cpuid] Provide cpuid_supported() to test for supported functions
      [time] Allow timer to be selected at runtime
      [hyperv] Provide timer based on the 10MHz time reference count MSR
      [int13] Avoid potential division by zero
      [int13] Test correct return status from INT 13 calls
      [settings] Add "unixtime" builtin setting to expose the current time
      [time] Report attempts to use timers before initialisation
      [interface] Provide the ability to shut down multiple interfaces
      [http] Cleanly shut down potentially looped interfaces
      [efi] Add missing SANBOOT_PROTO_HTTP to EFI default configuration
      [block] Remove spurious comments
      [block] Centralise SAN device abstraction
      [block] Centralise "san-drive" setting
      [int13] Refactor to use centralised SAN device abstraction
      [efi] Refactor to use centralised SAN device abstraction
      [block] Retry any SAN device operation
      [iscsi] Use intfs_shutdown() when shutting down multiple interfaces
      [scsi] Use intfs_shutdown() when shutting down multiple interfaces
      [block] Use intfs_shutdown() when shutting down multiple interfaces
      [scsi] Avoid duplicate calls to scsicmd_close()
      [build] Provide common ARRAY_SIZE() definition
      [efi] Update to current EDK2 headers
      [efi] Add EFI_ACPI_TABLE_PROTOCOL header and GUID definition
      [efi] Provide ACPI table description for SAN devices
      [efi] Skip cable detection at initialisation where possible
      [undi] Move PXE API caller back into UNDI driver
      [dhcp] Allow vendor class to be changed in DHCP requests
      [hermon] Avoid potential integer overflow when calculating memory mappings
      [arbel] Avoid potential integer overflow when calculating memory mappings
      [xfer] Ensure va_end() is called on failure path
      [nfs] Fix double free bug on error path
      [linda] Use correct length for memset()
      [qib7322] Use correct length for memset()
      [sis900] Remove extraneous memset() with incorrect length
      [802.11] Remove redundant NULL pointer check after dereference
      [crypto] Free correct pointer on the error path
      [librm] Fail gracefully if asked to ioremap() a zero length
      [usb] Use correct length for memcpy()
      [mucurses] Attempt to fix test for empty string
      [mucurses] Attempt to fix keypress processing logic
      [mucurses] Attempt to fix resource leaks
      [hyperv] Fix resource leaks on error path
      [slam] Fix resource leak on error path
      [slam] Avoid NULL pointer dereference in slam_pull_value()
      [eoib] Avoid passing a NULL I/O buffer to netdev_tx_complete_err()
      [http] Add missing check for memory allocation failure
      [mucurses] Attempt to fix use of uninitialised buffer with strcat()
      [xhci] Avoid accessing beyond end of endpoint context array
      [build] Avoid confusing sparse in single-argument DBG() macros
      [infiniband] Return status code from ib_create_cq() and ib_create_qp()
      [infiniband] Return status code from ib_create_mi()
      [block] Quell spurious Coverity size mismatch warning
      [ath] Add missing break statements
      [pixbuf] Avoid potential division by zero
      [usb] Use correct length for memcpy()
      [xen] Use standard calling pattern for asprintf()
      [tcp] Use correct length for memset()
      [video_subr] Use memmove() for overlapping memory copy
      [arbel] Assert that mapping length is non-zero
      [hermon] Assert that mapping length is non-zero
      [tlan] Guard against failure to identify chip
      [w89c840] Avoid potential array overrun
      [sis190] Avoid NULL pointer dereference
      [mucurses] Ensure SLK labels are always terminated
      [coverity] Add Coverity user model
      [malloc] Track maximum heap usage
      [travis] Add minimal .travis.yml file
      [travis] Build and run the unit test suite
      [travis] Integrate with Coverity Scan
      [rtl818x] Fix resource leak on error path
      [pcnet32] Eliminate redundant register read
      [iobuf] Increase minimum I/O buffer size to 128 bytes
      [vxge] Fix use of stale I/O buffer on error path
      [scsi] Avoid duplicate call to scsicmd_close() on TEST UNIT READY failure
      [block] Add dummy SAN device
      [block] Add basic multipath support
      [int13] Improve geometry guessing for unaligned partitions
      [int13con] Avoid overwriting random portions of SAN boot disks
      [time] Add sleep_fixed() function to sleep without checking for Ctrl-C
      [block] Allow SAN retry count to be reconfigured
      [block] Add a small delay between attempts to reopen SAN targets
      [block] Retry reopening indefinitely for multipath devices
      [block] Gracefully close SAN device if registration fails
      [linux] Use dummy SAN device
      [block] Ignore redundant xfer_window_changed() messages
      [block] Describe all SAN devices via ACPI tables
      [iscsi] Do not install iBFT when no iSCSI targets exist
      [http] Notify data transfer interface when underlying connection is ready
      [mucurses] Fix erroneous __nonnull attribute
      [build] Avoid implicit-fallthrough warnings on GCC 7
      [linux] Fix building with kernel 4.11 headers
      [scsi] Retry TEST UNIT READY command
      [libc] Add stdbool.h standard header
      [efi] Fix typo in efi_acpi_table_protocol_guid
      [efi] Add efi_sprintf() and efi_vsprintf()
      [block] Allow use of a non-default EFI SAN boot filename
      [intel] Show original CTRL and STATUS values in debugging output
      [intel] Do not enable ASDE on i350 backplane NIC
      [block] Provide sandev_read() and sandev_write() as global symbols
      [block] Provide abstraction to allow system to be quiesced
      [hyperv] Do not fail if guest OS ID MSR is already set
      [hyperv] Remove redundant return status code from mapping functions
      [hyperv] Cope with Windows Server 2016 enlightenments
      [efi] Standardise PCI debug messages
      [iscsi] Always send FirstBurstLength parameter
      [iscsi] Fix iBFT when no explicit initiator name setting exists
      [xen] Provide 18 4kB receive buffers to work around xen-netback bug
      [efi] Prevent EFI code from being linked in to non-EFI builds
      [tls] Keep cipherstream window open until TLS negotiation is complete
      [settings] Extend numerical setting tags to 64 bits
      [acpi] Make acpi_find_rsdt() a per-platform method
      [efi] Provide access to ACPI tables
      [acpi] Expose ACPI tables via settings mechanism
      [syslog] Handle backspace characters
      [hdprefix] Avoid attempts to read beyond the end of the disk
      [usb] Allow for USB network devices with no interrupt endpoint
      [build] Use -no-pie on newer versions of gcc
      [ecm] Display invalid MAC address strings in debug messages
      [cpuid] Allow input %ecx value to be specified
      [crypto] Expose RSA_CTX_SIZE constant
      [crypto] Expose asn1_grow()
      [crypto] Provide asn1_built() to construct a cursor from a builder
      [crypto] Expose pem_asn1() for use with non-image data
      [exanic] Add driver for Exablaze ExaNIC cards
      [usb] Use non-zero language ID to retrieve strings
      [mucurses] Avoid potential division by zero
      [tls] Support RFC5746 secure renegotiation
      [smscusb] Abstract out common SMSC USB device functionality
      [smsc95xx] Use common SMSC USB device functionality
      [smsc75xx] Use common SMSC USB device functionality
      [smscusb] Add ability to read MAC address from OTP
      [smscusb] Move non-inline register access functions to smscusb.c
      [smscusb] Allow for alternative PHY register layouts
      [smsc75xx] Expose functionality shared with LAN78xx devices
      [lan78xx] Add driver for Microchip LAN78xx USB Ethernet NICs

Mika Tiainen (1):
      [intel] Add INTEL_NO_PHY_RST for I219-V

Mike McCormack (1):
      [sky2] Use 32-bit read to read Y2_VAUX_AVAIL

Raed Salem (2):
      [golan] Update Connect-IB, ConnectX-4 and ConnectX-4 Lx (Infiniband) support
      [golan] Bug fixes and improved paging allocation method

Vishvananda Ishaya (1):
      [intel] Reset all virtual function settings

Vishvananda Ishaya Abrams (1):
      [iscsi] Don't close when receiving NOP-In

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-07-17 11:00:28 +02:00
Cédric Le Goater
346ebfc6fb target/ppc: fix CPU hotplug when radix is enabled (TCG)
But when a guest initializes radix mode, it issues a H_REGISTER_PROC_TBL
to update the LPCR of all CPUs. Hot-plugged CPUs inherit from the same
setting under KVM but not under TCG. So, Let's check for radix and update
the default LPCR to keep new CPUs in sync.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-17 15:07:05 +10:00
Greg Kurz
df8658de43 spapr: fix memory leak in spapr_core_pre_plug()
In case of error, we must ensure the dynamically allocated base_core_type
is freed, like it is done everywhere else in this function.

This is a regression introduced in QEMU 2.9 by commit 8149e2992f.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-17 15:07:05 +10:00
David Gibson
b55d295e3e pseries: Allow HPT resizing with KVM
So far, qemu implements the PAPR Hash Page Table (HPT) resizing extension
with TCG.  The same implementation will work with KVM PR, but we don't
currently allow that.  For KVM HV we can only implement resizing with the
assistance of the host kernel, which needs a new capability and ioctl()s.

This patch adds support for testing the new KVM capability and implementing
the resize in terms of KVM facilities when necessary.  If we're running on
a kernel which doesn't have the new capability flag at all, we fall back to
testing for PR vs. HV KVM using the same hack that we already use in a
number of places for older kernels.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-17 15:07:05 +10:00
David Gibson
2772cf6be9 pseries: Use smaller default hash page tables when guest can resize
We've now implemented a PAPR extension allowing PAPR guest to resize
their hash page table (HPT) during runtime.

This patch makes use of that facility to allocate smaller HPTs by default.
Specifically when a guest is aware of the HPT resize facility, qemu sizes
the HPT to the initial memory size, rather than the maximum memory size on
the assumption that the guest will resize its HPT if necessary for hot
plugged memory.

When the initial memory size is much smaller than the maximum memory size
(a common configuration with e.g. oVirt / RHEV) then this can save
significant memory on the HPT.

If the guest does *not* advertise HPT resize awareness when it makes the
ibm,client-architecture-support call, qemu resizes the HPT for maxmimum
memory size (unless it's been configured not to allow such guests at all).

For now we make that reallocation assuming the guest has not yet used the
HPT at all.  That's true in practice, but not, strictly, an architectural
or PAPR requirement.  If we need to in future we can fix this by having
the client-architecture-support call reboot the guest with the revised
HPT size (the client-architecture-support call is explicitly permitted to
trigger a reboot in this way).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-07-17 15:07:05 +10:00
David Gibson
52b81ab5e9 pseries: Enable HPT resizing for 2.10
We've now implemented a PAPR extensions which allows PAPR guests (i.e.
"pseries" machine type) to resize their hash page table during runtime.

However, that extension is only enabled if explicitly chosen on the
command line.  This patch enables it by default for spapr-2.10, but leaves
it disabled (by default) for older machine types.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-07-17 15:07:05 +10:00
David Gibson
0b0b831016 pseries: Implement HPT resizing
This patch implements hypercalls allowing a PAPR guest to resize its own
hash page table.  This will eventually allow for more flexible memory
hotplug.

The implementation is partially asynchronous, handled in a special thread
running the hpt_prepare_thread() function.  The state of a pending resize
is stored in SPAPR_MACHINE->pending_hpt.

The H_RESIZE_HPT_PREPARE hypercall will kick off creation of a new HPT, or,
if one is already in progress, monitor it for completion.  If there is an
existing HPT resize in progress that doesn't match the size specified in
the call, it will cancel it, replacing it with a new one matching the
given size.

The H_RESIZE_HPT_COMMIT completes transition to a resized HPT, and can only
be called successfully once H_RESIZE_HPT_PREPARE has successfully
completed initialization of a new HPT.  The guest must ensure that there
are no concurrent accesses to the existing HPT while this is called (this
effectively means stop_machine() for Linux guests).

For now H_RESIZE_HPT_COMMIT goes through the whole old HPT, rehashing each
HPTE into the new HPT.  This can have quite high latency, but it seems to
be of the order of typical migration downtime latencies for HPTs of size
up to ~2GiB (which would be used in a 256GiB guest).

In future we probably want to move more of the rehashing to the "prepare"
phase, by having H_ENTER and other hcalls update both current and
pending HPTs.  That's a project for another day, but should be possible
without any changes to the guest interface.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-17 15:07:05 +10:00
David Gibson
30f4b05bd0 pseries: Stubs for HPT resizing
This introduces stub implementations of the H_RESIZE_HPT_PREPARE and
H_RESIZE_HPT_COMMIT hypercalls which we hope to add in a PAPR
extension to allow run time resizing of a guest's hash page table.  It
also adds a new machine property for controlling whether this new
facility is available.

For now we only allow resizing with TCG, allowing it with KVM will require
kernel changes as well.

Finally, it adds a new string to the hypertas property in the device
tree, advertising to the guest the availability of the HPT resizing
hypercalls.  This is a tentative suggested value, and would need to be
standardized by PAPR before being merged.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-07-17 15:07:05 +10:00
Alexey Kardashevskiy
2ee77040f5 ppc/pnv: Remove unused XICSState reference
e6f7e110ee "ppc/xics: remove the XICSState classes" got rid of
XICSState, this is just an leftover.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-17 15:07:05 +10:00
Greg Kurz
e49c63d5b3 spapr: fix potential memory leak in spapr_core_plug()
Since commit 5c1da81215 ("spapr: Remove unnecessary differences between
hotplug and coldplug paths"), the CPU DT for the DRC is always allocated.
This causes a memory leak for pseries-2.6 and older machine types, that
don't support CPU hotplug and don't allocate DRCs for CPUs.

Reported-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-17 15:07:05 +10:00
David Gibson
67fea71bf3 spapr: Implement DR-indicator for physical DRCs only
According to PAPR, the DR-indicator should only be valid for physical DRCs,
not logical DRCs.  At the moment we implement it for all DRCs, so restrict
it to physical ones only.

We move the state to the physical DRC subclass, which means adding some
QOM boilerplate to handle the newly distinct type.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-07-17 15:07:05 +10:00
David Gibson
4445b1d27e spapr: Remove sPAPRConfigureConnectorState sub-structure
Most of the time, the state of a DRC object is contained in the single
'state' variable.  However, during the transition from UNISOLATE to
CONFIGURED state requires multiple calls to the ibm,configure-connector
RTAS call to retrieve the device tree for the attached device.  We need
some extra state to keep track of where we're up to in delivering the
device tree information to the guest.

Currently that extra state is in a sPAPRConfigureConnectorState
substructure which is only allocated when we're in the middle of the
configure connector process.  That sounds like a good idea, but the extra
state is only two integers - on many platforms that will take up the same
room as the (maybe NULL) ccs pointer even before malloc() overhead.  Plus
it's another object whose lifetime we need to manage.  In short, it's not
worth it.

So, fold the sPAPRConfigureConnectorState substructure directly into the
DRC object.

Previously the structure was allocated lazily when the configure-connector
call discovers it's not there.  Now, we need to initialize the subfields
pre-emptively, as soon as we enter UNISOLATE state.

Although it's not strictly necessary (the field values should only ever
be consulted when in UNISOLATE state), we try to keep them at -1 when in
other states, as a debugging aid.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-07-17 15:07:05 +10:00
David Gibson
9d4c0f4f0a spapr: Consolidate DRC state variables
Each DRC has three fields describing its state: isolation_state,
allocation_state and configured.  At first this seems like a reasonable
representation, since its based directly on the PAPR defined
isolation-state and allocation-state indicators.  However:
  * Only a few combinations of the two fields' values are permitted
  * allocation_state isn't used at all for physical DRCs
  * The indicators are write only so they don't really have a well
    defined current value independent of each other

This replaces these variables with a single state variable, whose names
and numbers are based on the diagram in LoPAPR section 13.4.  Along with
this we add code to check the current state on various operations and make
sure the requested transition is permitted.

Strictly speaking, this makes guest visible changes to behaviour (since we
probably allowed some transitions we shouldn't have before).  However, a
hypothetical guest broken by that wasn't PAPR compliant, and probably
wouldn't have worked under PowerVM.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-07-17 15:07:05 +10:00
David Gibson
f1c52354e5 spapr: Cleanups relating to DRC awaiting_release field
'awaiting_release' indicates that the host has requested an unplug of the
device attached to the DRC, but the guest has not (yet) put the device
into a state where it is safe to complete removal.

1. Rename it to 'unplug_requested' which to me at least is clearer

2. Remove the ->release_pending() method used to check this from outside
spapr_drc.c.  The method only plausibly has one implementation, so use
a plain function (spapr_drc_unplug_requested()) instead.

3. Remove it from the migration stream.  Attempting to migrate mid-unplug
is broken not just for spapr - in general management has no good way to
determine if the device should be present on the destination or not.  So,
until that's fixed, there's no point adding extra things to the stream.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-07-17 15:07:05 +10:00
David Gibson
a8dc47fd82 spapr: Refactor spapr_drc_detach()
This function has two unused parameters - remove them.

It also sets awaiting_release on all paths, except one.  On that path
setting it is harmless, since it will be immediately cleared by
spapr_drc_release().  So factor it out of the if statements.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-07-17 15:07:05 +10:00
David Gibson
ba50822ff8 spapr: Abort on delete failure in spapr_drc_release()
We currently ignore errors from the object_property_del() in
spapr_drc_release().  But the only way that could fail is if the property
doesn't exist, in which case it's a bug that we're in spapr_drc_release()
at all.  So change from ignoring to abort()ing on errors.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-17 15:07:05 +10:00
David Gibson
765d1bdda5 spapr: Simplify unplug path
spapr_lmb_release() and spapr_core_release() call hotplug_handler_unplug()
which after a bunch of indirection calls spapr_memory_unplug() or
spapr_core_unplug().  But we already know which is the appropriate thing
to call here, so we can just fold it directly into the release function.

Once that's done, there's no need for an hc->unplug method in the spapr
machine at all: since we also have an hc->unplug_request method, the
hotplug core will never use ->unplug.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-07-17 15:07:05 +10:00
David Gibson
82a93a1d30 spapr: Remove 'awaiting_allocation' DRC flag
The awaiting_allocation flag in the DRC was introduced by aab9913
"spapr_drc: Prevent detach racing against attach for CPU DR", allegedly to
prevent a guest crash on racing attach and detach.  Except.. information
from the BZ actually suggests a qemu crash, not a guest crash.  And there
shouldn't be a problem here anyway: if the guest has already moved the DRC
away from UNUSABLE state, the detach would already be deferred, and if it
hadn't it should be safe to detach it (the guest should fail gracefully
when it attempts to change the allocation state).

I think this was probably just a bandaid for some other problem in the
state management.  So, remove awaiting_allocation and associated code.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-07-17 15:07:05 +10:00
Laurent Vivier
94fd9cbaa3 spapr: Treat devices added before inbound migration as coldplugged
When migrating a guest which has already had devices hotplugged,
libvirt typically starts the destination qemu with -incoming defer,
adds those hotplugged devices with qmp, then initiates the incoming
migration.

This causes problems for the management of spapr DRC state.  Because
the device is treated as hotplugged, it goes into a DRC state for a
device immediately after it's plugged, but before the guest has
acknowledged its presence.  However, chances are the guest on the
source machine *has* acknowledged the device's presence and configured
it.

If the source has fully configured the device, then DRC state won't be
sent in the migration stream: for maximum migration compatibility with
earlier versions we don't migrate DRCs in coldplug-equivalent state.
That means that the DRC effectively changes state over the migrate,
causing problems later on.

In addition, logging hotplug events for these devices isn't what we
want because a) those events should already have been issued on the
source host and b) the event queue should get wiped out by the
incoming state anyway.

In short, what we really want is to treat devices added before an
incoming migration as if they were coldplugged.

To do this, we first add a spapr_drc_hotplugged() helper which
determines if the device is hotplugged in the sense relevant for DRC
state management.  We only send hotplug events when this is true.
Second, when we add a device which isn't hotplugged in this sense, we
force a reset of the DRC state - this ensures the DRC is in a
coldplug-equivalent state (there isn't usually a system reset between
these device adds and the incoming migration).

This is based on an earlier patch by Laurent Vivier, cleaned up and
extended.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-07-17 15:07:05 +10:00
David Gibson
5341258e86 spapr: Minor cleanups to events handling
The rtas_error_log structure is marked packed, which strongly suggests its
precise layout is important to match an external interface.  Along with
that one could expect it to have a fixed endianness to match the same
interface.  That used to be the case - matching the layout of PAPR RTAS
event format and requiring BE fields.

Now, however, it's only used embedded within sPAPREventLogEntry with the
fields in native order, since they're processed internally.

Clear that up by removing the nested structure in sPAPREventLogEntry.
struct rtas_error_log is moved back to spapr_events.c where it is used as
a temporary to help convert the fields in sPAPREventLogEntry to the correct
in memory format when delivering an event to the guest.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-17 15:07:05 +10:00
Daniel Henrique Barboza
fd38804b38 spapr: migrate pending_events of spapr state
In racing situations between hotplug events and migration operation,
a rtas hotplug event could have not yet be delivered to the source
guest when migration is started. In this case the pending_events of
spapr state need be transmitted to the target so that the hotplug
event can be finished on the target.

To achieve the minimal VMSD possible to migrate the pending_events list,
this patch makes the changes in spapr_events.c:

- 'log_type' of sPAPREventLogEntry struct deleted. This information can be
derived by inspecting the rtas_error_log summary field. A new function
called 'spapr_event_log_entry_type' was added to retrieve the type of
a given sPAPREventLogEntry.

- sPAPREventLogEntry, epow_log_full and hp_log_full were redesigned. The
only data we're going to migrate in the VMSD is the event log data itself,
which can be divided in two parts: a rtas_error_log header and an extended
event log field. The rtas_error_log header contains information about the
size of the extended log field, which can be used inside VMSD as the size
parameter of the VBUFFER_ALOC field that will store it. To allow this use,
the header.extended_length field must be exposed inline to the VMSD instead
of embedded into a 'data' field that holds everything. With this in mind,
the following changes were done:

    * a new 'header' field was added to sPAPREventLogEntry. This field holds a
a struct rtas_error_log inline.
    * the declaration of the 'rtas_error_log' struct was moved to spapr.h
to be visible to the VMSD macros.
    * 'data' field of sPAPREventLogEntry was renamed to 'extended_log' and
now holds only the contents of the extended event log.
   *  'struct rtas_error_log hdr' were taken away from both epow_log_full
and hp_log_full. This information is now available at the header field of
sPAPREventLogEntry.
   * epow_log_full and hp_log_full were renamed to epow_extended_log and
hp_extended_log respectively. This rename makes it clearer to understand
the new purpose of both structures: hold the information of an extended
event log field.
    * spapr_powerdown_req and spapr_hotplug_req_event now creates a
sPAPREventLogEntry structure that contains the full rtas log entry.
    * rtas_event_log_queue and rtas_event_log_dequeue now receives a
sPAPREventLogEntry pointer as a parameter instead of a void pointer.

- the endianess of the sPAPREventLogEntry header is now native instead
of be32. We can use the fields in native endianess internally and write
them in be32 in the guest physical memory inside 'check_exception'. This
allows the VMSD inside spapr.c to read the correct size of the
entended_log field.

- inside spapr.c, pending_events is put in a subsection in the spapr state
VMSD to make sure migration across different versions is not broken.

A small change in rtas_event_log_queue and rtas_event_log_dequeue were also
made: instead of calling qdev_get_machine(), both functions now receive
a pointer to the sPAPRMachineState. This pointer is already available in
the callers of these functions and we don't need to waste resources
calling qdev() again.

Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-17 15:07:05 +10:00
David Gibson
3579d606a0 spapr: Remove unnecessary instance_size specifications from DRC subtypes
All the DRC subtypes explicitly list instance_size in TypeInfo (all as
sizeof(sPAPRDRConnector).  This isn't necessary, since if it's not listed
it will be derived from the parent type.

Worse, this is dangerous, because if a subtype is changed in future to
have a larger structure, then subtypes of that subtype also need to have
instance_size changed, or it will lead to hard to track memory corruption
bugs.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-17 15:06:08 +10:00
Paolo Bonzini
978373143c travis: add no-TCG build
It's fairly easy for --disable-tcg to bitrot.  Test it in our CI.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170714093016.10897-1-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:34:20 +08:00
Fam Zheng
0b95ff72cb docker.py: Improve subprocess exit code handling
A few error handlings are missing because we ignore the subprocess exit
code, for example "docker build" errors are currently ignored.

Introduce _do_check() aside the existing _do() method and use it in a
few places.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170712075528.22770-3-famz@redhat.com>
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:34:20 +08:00
Fam Zheng
58bf7b6d8c docker.py: Drop infile parameter
The **kwargs can do this just well.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170712075528.22770-2-famz@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:34:20 +08:00
Daniel P. Berrange
8a2390a4f4 docker: Don't enable networking as a side-effect of DEBUG=1
When trying to debug problems with tests it is natural to set
DEBUG=1 when starting the docker environment. Unfortunately
this has a side-effect of enabling an eth0 network interface
in the container, which changes the operating environment of
the test suite. IOW tests with fail may suddenly start
working again if DEBUG=1 is set, due to changed network setup.

Add a separate NETWORK variable to allow enablement of
networking separately from DEBUG=1. This can be used in two
ways. To enable the default docker network backend

  make docker-test-build@fedora NETWORK=1

while to enable a specific network backend, eg join the network
associated with the container 'wibble':

  make docker-test-build@fedora NETWORK=container:wibble

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <20170713144352.2212-1-berrange@redhat.com>
[Drop the superfluous second $(subst ...). - Fam]
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:34:20 +08:00
Paolo Bonzini
5aca18a4ff ssh: support I/O from any AioContext
The coroutine may run in a different AioContext, causing the
fd handler to busy wait.  Fix this by resetting the handler
in restart_coroutine, before the coroutine is restarted.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-12-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:34:20 +08:00
Paolo Bonzini
f1af3251f8 sheepdog: add queue_lock
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-11-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:34:20 +08:00
Paolo Bonzini
1f01e50b83 qed: protect table cache with CoMutex
This makes the driver thread-safe.  The CoMutex is dropped temporarily
while accessing the data clusters or the backing file.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-10-pbonzini@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:34:11 +08:00
Paolo Bonzini
61c7887e0f qed: introduce bdrv_qed_init_state
This will be used in the next patch, which will call bdrv_qed_do_open
with a CoMutex taken.  bdrv_qed_init_state provides a nice place to
initialize it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-9-pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:33:11 +08:00
Paolo Bonzini
61124f03ab block: invoke .bdrv_drain callback in coroutine context and from AioContext
This will let the callback take a CoMutex in the next patch.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-8-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:28:15 +08:00
Paolo Bonzini
e7569c1829 qed: move tail of qed_aio_write_main to qed_aio_write_{cow, alloc}
This part is never called for in-place writes, move it away to avoid
the "backwards" coding style typical of callback-based code.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-7-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:28:15 +08:00
Paolo Bonzini
254aee4dbb vvfat: make it thread-safe
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-6-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:28:15 +08:00
Paolo Bonzini
778b087e51 vpc: make it thread-safe
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-5-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:28:15 +08:00
Paolo Bonzini
1e88663979 vdi: make it thread-safe
The VirtualBox driver is using a mutex to order all allocating writes,
but it is not protecting accesses to the bitmap because they implicitly
happen under the AioContext mutex.  Change this to use a CoRwlock
explicitly.

Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-4-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:28:15 +08:00
Paolo Bonzini
667221c10d coroutine-lock: add qemu_co_rwlock_downgrade and qemu_co_rwlock_upgrade
These functions are more efficient in the presence of contention.
qemu_co_rwlock_downgrade also guarantees not to block, which may
be useful in some algorithms too.

Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-3-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:28:15 +08:00
Paolo Bonzini
a8c57408cd qcow2: call CoQueue APIs under CoMutex
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-2-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-07-17 11:28:15 +08:00
Peter Maydell
75cb298d90 slirp: Handle error returns from sosendoob()
sosendoob() can return a failure code, but all its callers ignore it.
This is OK in sbappend(), as the comment there states -- we will try
again later in sowrite(). Add a (void) cast to tell Coverity so.
In sowrite() we do need to check the return value -- we should handle
a write failure in sosendoob() the same way we handle a write failure
for the normal data.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-07-15 14:28:25 +02:00
Peter Maydell
0b466065eb slirp: Handle error returns from slirp_send() in sosendoob()
The code in sosendoob() assumes that slirp_send() always
succeeds, but it might return an OS error code (for instance
if the other end has disconnected). Catch these and return
the caller either -1 on error or the number of urgent bytes
actually written. (None of the callers check this return
value currently, though.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-07-15 14:28:25 +02:00
Peter Maydell
12dccfe4f5 slirp: fork_exec(): Don't close() a negative number in fork_exec()
In a fork_exec() error path we try to closesocket(s) when s might
be a negative number because the thing that failed was the
qemu_socket() call. Add a guard so we don't do this.

(Spotted by Coverity: CID 1005727 issue 1 of 2.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-07-15 14:28:25 +02:00
Marc-André Lureau
e88718fc0b slirp: use DIV_ROUND_UP
I used the clang-tidy qemu-round check to generate the fix:
https://github.com/elmarco/clang-tools-extra

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-07-15 14:28:25 +02:00
Peter Maydell
2286468fac docs/devel/memory.txt: Add section about RAM migration
Add a section to docs/devel/memory.txt about migration of
the backing memory for RAM regions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1499438577-7674-12-git-send-email-peter.maydell@linaro.org
2017-07-14 17:59:42 +01:00
Peter Maydell
ce66d778cd hw/display/qxl.c Use memory_region_init_ram()
Switch to memory_region_init_ram(), since we pass the same DeviceState
to both memory_region_init_ram_nomigrate() and vmstate_register_ram().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1499438577-7674-11-git-send-email-peter.maydell@linaro.org
2017-07-14 17:59:42 +01:00
Peter Maydell
fefa92565f hw/pci/pci.c: Use memory_region_init_rom()
Since we pass the same DeviceState object to
memory_region_init_rom_nomigrate() and vmstate_register_ram(), we can
switch to using memory_region_init_rom() instead.

(This isn't entirely obvious from the code since it is using
&pdev->qdev rather than DEVICE(pdov) for some reason, but
PCIDevice does indeed use 'qdev' for its parent DeviceState member.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1499438577-7674-10-git-send-email-peter.maydell@linaro.org
2017-07-14 17:59:42 +01:00
Peter Maydell
bba3ddf72e hw/block/pflash_cfi01, pflash_cfi02: Use memory_region_init_rom_device()
Since we pass the same DeviceState object to
memory_region_init_rom_device_nomigrate() and vmstate_register_ram(),
we can switch to using memory_region_init_rom_device() instead.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1499438577-7674-9-git-send-email-peter.maydell@linaro.org
2017-07-14 17:59:42 +01:00
Peter Maydell
98a99ce084 hw: Use new memory_region_init_{ram, rom, rom_device}() functions
Use the new functions memory_region_init_{ram,rom,rom_device}()
instead of manually calling the _nomigrate() version and then
vmstate_register_ram_global().

Patch automatically created using coccinelle script:
 spatch --in-place -sp_file scripts/coccinelle/memory-region-init-ram.cocci -dir hw

(As it turns out, there are no instances of the rom and
rom_device functions that are caught by this script.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1499438577-7674-8-git-send-email-peter.maydell@linaro.org
2017-07-14 17:59:42 +01:00
Peter Maydell
96d851a64b scripts/coccinelle/memory-region-init-ram.cocci: New script
Add a coccinelle script that can be used to automatically convert
manual sequences of
 memory_region_init_ram_nomigrate()
 vmstate_register_ram{,_global}()
to use the new
 memory_region_init_ram()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1499438577-7674-7-git-send-email-peter.maydell@linaro.org
2017-07-14 17:59:42 +01:00
Peter Maydell
b08199c6fb memory.h: Add memory_region_init_{ram, rom, rom_device}() handling migration
Add new utility functions which both initialize a RAM
MemoryRegion and arrange for its contents to be migrated;
we give thes the memory_region_init_ram(), memory_region_init_rom()
and memory_region_init_rom_device() names that we just freed up
by renaming the old implementations to _nomigrate().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1499438577-7674-6-git-send-email-peter.maydell@linaro.org
2017-07-14 17:59:42 +01:00
Peter Maydell
b59821a95b memory: Rename memory_region_init_rom() and _rom_device() to _nomigrate()
Rename memory_region_init_rom() to memory_region_init_rom_nomigrate()
and memory_region_init_rom_device() to
memory_region_init_rom_device_nomigrate().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1499438577-7674-5-git-send-email-peter.maydell@linaro.org
2017-07-14 17:59:42 +01:00
Peter Maydell
1cfe48c1ce memory: Rename memory_region_init_ram() to memory_region_init_ram_nomigrate()
Rename memory_region_init_ram() to memory_region_init_ram_nomigrate().
This leaves the way clear for us to provide a memory_region_init_ram()
which does handle migration.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1499438577-7674-4-git-send-email-peter.maydell@linaro.org
2017-07-14 17:59:42 +01:00
Peter Maydell
a5c0234bb2 memory: Document that the RAM MR initializers do not handle migration
The various functions for initializing RAM MemoryRegions do not do
anything to cause the data in the MemoryRegion to be migrated.
Note in their documentation comments that this is the responsibility
of the caller.

(We will shortly add a new function that *does* do this for you.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1499438577-7674-3-git-send-email-peter.maydell@linaro.org
2017-07-14 17:59:42 +01:00
Peter Maydell
09ad643823 include/hw/boards.h: Document memory_region_allocate_system_memory()
Add a documentation comment for memory_region_allocate_system_memory().

In particular, the reason for this function's existence and the
requirement on board code to call it exactly once are non-obvious.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1499438577-7674-2-git-send-email-peter.maydell@linaro.org
2017-07-14 17:47:52 +01:00
Michael S. Tsirkin
4871b51b92 vmgenid-test: use boot-sector infrastructure
There's no requirement for RSDP to be installed last
by the firmware, so in rare cases vmgen id test hits
a race: RSDP is there but VM GEN ID isn't.

To fix, switch to common boot sector infrastructure.

Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Ben Warren <ben@skyportsystems.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Ben Warren <ben@skyportsystems.com>
Message-id: 1500046217-24597-1-git-send-email-mst@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-14 17:03:03 +01:00
Peter Maydell
23f87b9973 Merge remote-tracking branch 'remotes/berrange/tags/pull-sockets-2017-07-11-3' into staging
Merge sockets 2017/07/11 v3

# gpg: Signature made Fri 14 Jul 2017 16:09:03 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-sockets-2017-07-11-3:
  io: preserve ipv4/ipv6 flags when resolving InetSocketAddress
  sockets: ensure we don't accept IPv4 clients when IPv4 is disabled
  sockets: don't block IPv4 clients when listening on "::"
  sockets: ensure we can bind to both ipv4 & ipv6 separately

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-14 16:13:30 +01:00
Daniel P. Berrange
563a3987b9 io: preserve ipv4/ipv6 flags when resolving InetSocketAddress
The original InetSocketAddress struct may have has_ipv4 and
has_ipv6 fields set, which will control both the ai_family
used during DNS resolution, and later use of the V6ONLY
flag.

Currently the standalone DNS resolver code drops the
has_ipv4 & has_ipv6 flags after resolving, which means
the later bind() code won't correctly set V6ONLY.

This fixes the following scenarios

  -vnc :0,ipv4=off
  -vnc :0,ipv6=on
  -vnc :::0,ipv4=off
  -vnc :::0,ipv6=on

which all mistakenly accepted IPv4 clients

Acked-by: Gerd Hoffmann <kraxel@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-14 14:28:29 +01:00
Daniel P. Berrange
94bc0d1978 sockets: ensure we don't accept IPv4 clients when IPv4 is disabled
Currently if you disable listening on IPv4 addresses, via the
CLI flag ipv4=off, we still mistakenly accept IPv4 clients via
the IPv6 listener socket due to IPV6_V6ONLY flag being unset.

We must ensure IPV6_V6ONLY is always set if ipv4=off

This fixes the following scenarios

  -incoming tcp::9000,ipv6=on
  -incoming tcp:[::]:9000,ipv6=on
  -chardev socket,id=cdev0,host=,port=9000,server,nowait,ipv4=off
  -chardev socket,id=cdev0,host=,port=9000,server,nowait,ipv6=on
  -chardev socket,id=cdev0,host=::,port=9000,server,nowait,ipv4=off
  -chardev socket,id=cdev0,host=::,port=9000,server,nowait,ipv6=on

which all mistakenly accepted IPv4 clients

Acked-by: Gerd Hoffmann <kraxel@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-14 14:28:29 +01:00
Daniel P. Berrange
4dc5d815c4 sockets: don't block IPv4 clients when listening on "::"
When inet_parse() parses the hostname, it is forcing the
has_ipv6 && ipv6 flags if the address contains a ":". This
means that if the user had set the ipv4=on flag, to try to
restrict the listener to just ipv4, an error would not have
been raised.  eg

   -incoming tcp:[::]:9000,ipv4

should have raised an error because listening for IPv4
on "::" is a non-sensical combination. With this removed,
we now call getaddrinfo() on "::" passing PF_INET and
so getaddrinfo reports an error about the hostname being
incompatible with the requested protocol:

 qemu-system-x86_64: -incoming tcp:[::]:9000,ipv4: address resolution
    failed for :::9000: Address family for hostname not supported

Likewise it is explicitly setting the has_ipv4 & ipv4
flags when the address contains only digits + '.'. This
has no ill-effect, but also has no benefit, so is removed.

Acked-by: Gerd Hoffmann <kraxel@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-14 14:28:29 +01:00
Daniel P. Berrange
5e059be4c7 sockets: ensure we can bind to both ipv4 & ipv6 separately
When binding to an IPv6 socket we currently force the
IPV6_V6ONLY flag to off. This means that the IPv6 socket
will accept both IPv4 & IPv6 sockets when QEMU is launched
with something like

  -vnc :::1

While this is good for that case, it is bad for other
cases. For example if an empty hostname is given,
getaddrinfo resolves it to 2 addresses 0.0.0.0 and ::,
in that order. We will thus bind to 0.0.0.0 first, and
then fail to bind to :: on the same port. The same
problem can happen if any other hostname lookup causes
the IPv4 address to be reported before the IPv6 address.

When we get an IPv6 bind failure, we should re-try the
same port, but with IPV6_V6ONLY turned on again, to
avoid clash with any IPv4 listener.

This ensures that

  -vnc :1

will bind successfully to both 0.0.0.0 and ::, and also
avoid

  -vnc :1,to=2

from mistakenly using a 2nd port for the :: listener.

This is a regression due to commit 396f935 "ui: add ability to
specify multiple VNC listen addresses".

Acked-by: Gerd Hoffmann <kraxel@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-07-14 14:28:29 +01:00
Peter Maydell
fbc8ea1ed0 Merge remote-tracking branch 'remotes/borntraeger/tags/s390x-20170714' into staging
s390x/kvm/migration/cpumodel: fixes, enhancements and cleanups

- add a network boot rom for s390 (Thomas Huth)
- migration of storage attributes like the CMMA used/unused state
- PCI related enhancements - full support for aen, ais and zpci
- migration support for css with vmstates (Halil Pasic)
- cpu model enhancements for cpu features
- guarded storage support

# gpg: Signature made Fri 14 Jul 2017 11:33:04 BST
# gpg:                using RSA key 0x117BBC80B5A61C7C
# gpg: Good signature from "Christian Borntraeger (IBM) <borntraeger@de.ibm.com>"
# Primary key fingerprint: F922 9381 A334 08F9 DBAB  FBCA 117B BC80 B5A6 1C7C

* remotes/borntraeger/tags/s390x-20170714: (40 commits)
  s390x/gdb: add gs registers
  s390x/arch_dump: also dump guarded storage control block
  s390x/kvm: enable guarded storage
  s390x/kvm: Enable KSS facility for nested virtualization
  s390x/cpumodel: add esop/esop2 to z12 model
  s390x/cpumodel: we are always in zarchitecture mode
  s390x/cpumodel: wire up new hardware features
  s390x/flic: migrate ais states
  s390x/cpumodel: add zpci, aen and ais facilities
  s390x: initialize cpu firstly
  pc-bios/s390: rebuild s390-ccw.img
  pc-bios/s390: add s390-netboot.img
  pc-bios/s390-ccw: Link libnet into the netboot image and do the TFTP load
  pc-bios/s390-ccw: Add virtio-net driver code
  pc-bios/s390-ccw: Add core files for the network bootloading program
  roms/SLOF: Update submodule to latest status
  pc-bios/s390-ccw: Add code for virtio feature negotiation
  pc-bios/s390-ccw: Remove unused structs from virtio.h
  pc-bios/s390-ccw: Move byteswap functions to a separate header
  pc-bios/s390-ccw: Add a write() function for stdio
  ...

Conflicts:
	target/s390x/kvm.c

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-14 14:19:35 +01:00
Peter Maydell
6c6076662d Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* gdbstub fixes (Alex)
* IOMMU MemoryRegion subclass (Alexey)
* Chardev hotswap (Anton)
* NBD_OPT_GO support (Eric)
* Misc bugfixes
* DEFINE_PROP_LINK (minus the ARM patches - Fam)
* MAINTAINERS updates (Philippe)

# gpg: Signature made Fri 14 Jul 2017 11:06:27 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (55 commits)
  spapr_rng: Convert to DEFINE_PROP_LINK
  cpu: Convert to DEFINE_PROP_LINK
  mips_cmgcr: Convert to DEFINE_PROP_LINK
  ivshmem: Convert to DEFINE_PROP_LINK
  dimm: Convert to DEFINE_PROP_LINK
  virtio-crypto: Convert to DEFINE_PROP_LINK
  virtio-rng: Convert to DEFINE_PROP_LINK
  virtio-scsi: Convert to DEFINE_PROP_LINK
  virtio-blk: Convert to DEFINE_PROP_LINK
  qdev: Add const qualifier to PropertyInfo definitions
  qmp: Use ObjectProperty.type if present
  qdev: Introduce DEFINE_PROP_LINK
  qdev: Introduce PropertyInfo.create
  qom: enforce readonly nature of link's check callback
  translate-all: remove redundant !tcg_enabled check in dump_exec_info
  vl: fix breakage of -tb-size
  nbd: Implement NBD_INFO_BLOCK_SIZE on client
  nbd: Implement NBD_INFO_BLOCK_SIZE on server
  nbd: Implement NBD_OPT_GO on client
  nbd: Implement NBD_OPT_GO on server
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-14 12:16:09 +01:00
Christian Borntraeger
86158a2a2b s390x/gdb: add gs registers
Let's provide the guarded storage registers via gdb server.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:49 +02:00
Christian Borntraeger
21a106904b s390x/arch_dump: also dump guarded storage control block
Write the new note section of type 30b (guarded storage control block).

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:49 +02:00
Fan Zhang
62deb62d99 s390x/kvm: enable guarded storage
Introduce guarded storage support for KVM guests on s390.
We need to enable the capability, extend machine check validity,
sigp store-additional-status-at-address, and migration.

The feature is fenced for older machine type versions.

Signed-off-by: Fan Zhang <zhangfan@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:49 +02:00
Farhan Ali
c0a9cd940e s390x/kvm: Enable KSS facility for nested virtualization
If the host supports keyless subset (KSS) then first level
guest (G2) should enable KSS facility as well.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Reviewed-by: Eric Farman <farman@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:49 +02:00
Jason J. Herne
7223bccea3 s390x/cpumodel: add esop/esop2 to z12 model
Add esop and esop2 features to z12 model where esop2 was originally
introduced. Disable esop and esop2 when using compatibility machine
v2.9 or earlier.

Signed-off-by: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:49 +02:00
Jason J. Herne
075e52b816 s390x/cpumodel: we are always in zarchitecture mode
In QEMU, a guest VCPU always started in and never was able to leave
z/Architecture mode. Now we have an architected way of showing this
condition.

The SIGP SET ARCHITECTURE instruction is simply rejected. Linux as guest
seems to not care about the return value, which is a good thing
The new handling is just like already being in z/Architecture mode.

We'll not try to fake absence of this facility, but still not indicate
the facility in case some strange CPU model turned z/Architecture off
completely (which doesn't work either way but let's us see how a
guest would react on a lack of this facility).

Signed-off-by: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:49 +02:00
Jason J. Herne
6da5c593bb s390x/cpumodel: wire up new hardware features
Some new guest features have been introduced recently. Let's wire
them up in the CPU model.

Signed-off-by: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
[split patch]
2017-07-14 12:29:49 +02:00
Yi Min Zhao
e7be8d4997 s390x/flic: migrate ais states
During migration we should transfer ais states to the target guest.
This patch introduces a subsection to kvm_s390_flic_vmstate and new
vmsd for qemu_flic. The ais states need to be migrated only when
ais is supported.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
2017-07-14 12:29:49 +02:00
Yi Min Zhao
3b00f702c2 s390x/cpumodel: add zpci, aen and ais facilities
zPCI instructions and facilities are available since IBM zEnterprise
EC12. To support z/PCI in QEMU we enable zpci, aen and ais facilities
starting with zEC12 GA1. And we always set zpci and aen bits in max cpu
model. Later they might be switched off due to applied real cpu model.
For ais bit, we only provide it in the full cpu model beginning with
zEC12 and defer its enablement in the default cpu model to a later point
in time. At the same time, disable them for 2.9 and older machines.

Because of introducing AIS facility, we could check if it's enabled to
initialize flic->ais_supported with the real value.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
2017-07-14 12:29:49 +02:00
Yi Min Zhao
3720d3356d s390x: initialize cpu firstly
By initializing the CPU firstly, we are able to retrieve and use the
CPU model features when initializing other subsystem or devices.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:49 +02:00
Christian Borntraeger
fb32d2385a pc-bios/s390: rebuild s390-ccw.img
rebuild after the following commits

4b996d0 pc-bios/s390-ccw: Link libnet into the netboot image and do the TFTP load
e6879a6 pc-bios/s390-ccw: Add virtio-net driver code
766500f pc-bios/s390-ccw: Add core files for the network bootloading program
f807e55 pc-bios/s390-ccw: Add code for virtio feature negotiation
b4e3b4f pc-bios/s390-ccw: Remove unused structs from virtio.h
dd3dc5e pc-bios/s390-ccw: Move byteswap functions to a separate header
a20b4fe pc-bios/s390-ccw: Add a write() function for stdio
262e07c pc-bios/s390-ccw: Move virtio-block related functions into a separate file
7438d32 pc-bios/s390-ccw: Move ebc2asc to sclp.c
8760bad pc-bios/s390-ccw: Move libc functions to separate header
c68f450 pc-bios/s390-ccw: use STRIP variable in Makefile

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Christian Borntraeger
6c5359e161 pc-bios/s390: add s390-netboot.img
It's already possible to do a network boot of an s390x guest with an
external netboot image based on a Linux installation, but it would
be much more convenient if the s390-ccw firmware supported network
booting right out of the box, without the need to assemble such an
external image first.

This is an s390-netboot.img that can be used for network booting.
You can download a combined kernel + initrd image via TFTP
by starting QEMU for example with:

 qemu-system-s390x ... -device virtio-net,netdev=n1,bootindex=1 \
       -netdev user,id=n1,tftp=/path/to/tftp,bootfile=kernel.img

Note that this version does not support downloading via config
files (i.e. pxelinux config files or .INS config files) yet. This
will be added later.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Thomas Huth
29d12216da pc-bios/s390-ccw: Link libnet into the netboot image and do the TFTP load
Most of the code has been taken from SLOF's netload.c file. Now we
can finally load an image via TFTP and execute the downloaded kernel.

Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499863793-18627-12-git-send-email-thuth@redhat.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Thomas Huth
00dde1e60d pc-bios/s390-ccw: Add virtio-net driver code
The driver provides the recv() and send() functions which will
be required by SLOF's libnet code for receiving and sending
packets.

Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499863793-18627-11-git-send-email-thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Thomas Huth
3e4415a751 pc-bios/s390-ccw: Add core files for the network bootloading program
This is just a preparation for the next steps: Add a makefile and a
stripped down copy of pc-bios/s390-ccw/main.c as a basis for the network
bootloader program, linked against the libc from SLOF already (which we
will need for SLOF's libnet). The networking code is not included yet.

Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499863793-18627-10-git-send-email-thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Thomas Huth
4bfd755ff3 roms/SLOF: Update submodule to latest status
We need the latest fixes for building the libc and libnet
of SLOF for the s390-ccw network bootloader firmware.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499863793-18627-9-git-send-email-thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Thomas Huth
59efbff1cb pc-bios/s390-ccw: Add code for virtio feature negotiation
The upcoming virtio-net driver needs to negotiate some features,
so we need the possibility to do this in the core virtio code.

Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499863793-18627-8-git-send-email-thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Thomas Huth
ac7bec7cf9 pc-bios/s390-ccw: Remove unused structs from virtio.h
Looks like they have never been used, so let's simply remove them.

Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499863793-18627-7-git-send-email-thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Thomas Huth
824df3b84b pc-bios/s390-ccw: Move byteswap functions to a separate header
We'll need them in code that is not related to bootmap.h, so
they should reside in an independent header.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499863793-18627-6-git-send-email-thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Thomas Huth
3639f93f91 pc-bios/s390-ccw: Add a write() function for stdio
The stdio functions from the SLOF libc need a write() function for
printing text to stdout/stderr. Let's implement this function by
refactoring the code from sclp_print().

Acked-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499863793-18627-5-git-send-email-thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Thomas Huth
867e039a06 pc-bios/s390-ccw: Move virtio-block related functions into a separate file
The netboot code is going to link against the code from virtio.c, too, so
we've got to move the virtio-block and -scsi related code out of the way.

Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499863793-18627-4-git-send-email-thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Thomas Huth
af3bb73ab1 pc-bios/s390-ccw: Move ebc2asc to sclp.c
We will later need this array in a file that we will link to the
netboot code, too. Since there is some ebcdic conversion done
in sclp_get_loadparm_ascii(), the sclp.c file seems to be a good
candidate.

Acked-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499863793-18627-3-git-send-email-thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Thomas Huth
90806fec99 pc-bios/s390-ccw: Move libc functions to separate header
The upcoming netboot code will use the libc from SLOF. To be able
to still use s390-ccw.h there, the libc related functions in this
header have to be moved to a different location.
And while we're at it, remove the duplicate memcpy() function from
sclp.c.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499863793-18627-2-git-send-email-thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Halil Pasic
b5f5a3afb6 s390x/css: use SubchDev.orb
Instead of passing around a pointer to ORB let us simplify some
function signatures by using the previously introduced ORB saved at the
subchannel (SubchDev).

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Message-Id: <20170711145441.33925-7-pasic@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Halil Pasic
e996583eb3 s390x/css: activate ChannelSubSys migration
Turn on migration for the channel subsystem for the next machine.  For
legacy machines we still have to do things the old way.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Message-Id: <20170711145441.33925-6-pasic@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Halil Pasic
ff443fe6b5 s390x/css: add ORB to SubchDev
Since we are going to need a migration compatibility breaking change to
activate ChannelSubSys migration let us use the opportunity to introduce
ORB to the SubchDev before that (otherwise we would need separate
handling e.g. a compat property).

The ORB will be useful for implementing IDA, or async handling of
subchannel work.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Guenther Hutzl <hutzl@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Message-Id: <20170711145441.33925-5-pasic@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:48 +02:00
Halil Pasic
457af62603 s390x/css: add missing css state conditionally
Although we have recently vmstatified the migration of some css
infrastructure,  for some css entities there is still state to be
migrated left, because the focus was keeping migration stream
compatibility (that is basically everything as-is).

Let us add vmstate helpers and extend existing vmstate descriptions so
that we have everything we need. Let us guard the added state via
css_migration_enabled, so we keep the compatible behavior if css
migration is disabled.

Let's also annotate the bits which do not need to be migrated for better
readability.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170711145441.33925-4-pasic@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Halil Pasic
52629b3ba8 s390x: add css_migration_enabled to machine class
Currently the migration of the channel subsystem (css) is only partial
and is done by the virtio ccw proxies -- the only migratable css devices
existing at the moment.

With the current work on emulated and passthrough devices we need to
decouple the migration of the channel subsystem state from virtio ccw,
and have a separate section for it. A new section  however necessarily
breaks the migration compatibility.

So let us introduce a switch at the machine class, and put it in 'off'
state for now. We will turn the switch 'on' for future machines once all
preparations are met. For compatibility  machines the switch will stay
'off'.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Message-Id: <20170711145441.33925-3-pasic@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Halil Pasic
cec8bbf7d6 s390x: add helper get_machine_class
We will need the machine class at machine initialization time, so the
usual way via qdev won't do. Let's cache the machine class and also use
the default values of the base machine for capability discovery.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Message-Id: <20170711145441.33925-2-pasic@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Yi Min Zhao
25a08b8ded s390x/css: update css_adapter_interrupt
Let's use the new inject_airq callback of flic to inject adapter
interrupts. For kvm case, if the kernel flic doesn't support the new
interface, the irq routine remains unchanged. For non-kvm case,
qemu-flic handles the suppression process.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Fei Li <sherrylf@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Fei Li
2283f4d67a s390x/sic: realize SIC handling
Currently, we do nothing for the SIC instruction, but we need to
implement it properly. Let's add proper handling in the backend code.

Co-authored-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Fei Li <sherrylf@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Yi Min Zhao
1622ffd515 s390x/flic: introduce inject_airq callback
Let's introduce a specialized way to inject adapter interrupts that,
unlike the common interrupt injection method, allows to take the
characteristics of the adapter into account.

For adapters subject to AIS facility:
- for non-kvm case, we handle the suppression for a given ISC in QEMU.
- for kvm case, we pass adapter id to kvm to do airq injection.

Add add tracepoint for suppressed airq and suppressing airq.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Fei Li <sherrylf@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Fei Li
6c1dd652a6 s390x/flic: introduce modify_ais_mode callback
In order to emulate the adapter interruption suppression (AIS)
facility properly, the guest needs to be able to modify the AIS mask.
Interrupt suppression will be handled via the flic (for kvm, via a
recently introduced kernel backend; for !kvm, in the flic code), so
let's introduce a method to change the mode via the flic interface.

We introduce the 'simm' and 'nimm' fields to QEMUS390FLICState
to store interruption modes for each ISC. Each bit in 'simm' and
'nimm' targets one ISC, and collaboratively indicate three modes:
ALL-Interruptions, SINGLE-Interruption and NO-Interruptions. This
interface can initiate most transitions between the states; transition
from SINGLE-Interruption to NO-Interruptions via adapter interrupt
injection will be introduced in a following patch. The meaningful
combinations are as follows:

    interruption mode | simm bit | nimm bit
    ------------------|----------|----------
             ALL      |    0     |     0
           SINGLE     |    1     |     0
             NO       |    1     |     1

Co-authored-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Fei Li <sherrylf@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Fei Li
1497c16066 s390x: add flags field for registering I/O adapter
Introduce a new 'flags' field to IoAdapter to contain further
characteristics of the adapter, like whether the adapter is subject to
adapter-interruption suppression.

For the kvm case, pass this value in the 'flags' field when
registering an adapter.

Signed-off-by: Fei Li <sherrylf@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Jason J. Herne
c9ad8a7a53 s390x/cpumodel: provide compat handling for new cpu features
Provide a mechanism to disable features in compatibility machines.

Signed-off-by: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Jason J. Herne
ec3aadb13e s390x/cpumodel: clean up spacing and comments
Clean up spacing and add comments to clarify difference between base, full and
default models.

Not having spacing around the model definitions in gen-features.c is
particularly frustrating as the reader tends to misinterpret which model they
are looking at or editing.

Signed-off-by: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Claudio Imbrenda
f860d49753 s390x/migration: Monitor commands for storage attributes
Add an "info" monitor command to non-destructively inspect the state of
the storage attributes of the guest, and a normal command to toggle
migration mode (useful for debugging).

Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Claudio Imbrenda
903fd80b03 s390x/migration: Storage attributes device
Storage attributes device, like we have for storage keys.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:47 +02:00
Christian Borntraeger
3272f0e22f linux-headers: update to 4.13-rc0
commit af3c8d98508d37541d4bf57f13a984a7f73a328c
    Merge tag 'drm-for-v4.13' of git://people.freedesktop.org/~airlied/linux

There is a change pending for v4.13-rc1 in linux-headers/linux/kvm.h
I will submit a fixup patch for 2.10 as soon as it hits the kernel.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 12:29:10 +02:00
Peter Maydell
7d367e7002 Merge remote-tracking branch 'remotes/mcayland/tags/qemu-openbios-signed' into staging
Update OpenBIOS images

# gpg: Signature made Thu 13 Jul 2017 20:01:38 BST
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* remotes/mcayland/tags/qemu-openbios-signed:
  Update OpenBIOS images to fbc1b4a built from submodule.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-14 11:17:36 +01:00
Fam Zheng
68c761e19c spapr_rng: Convert to DEFINE_PROP_LINK
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-21-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:43 +02:00
Fam Zheng
c7e002c55a cpu: Convert to DEFINE_PROP_LINK
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-20-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:43 +02:00
Fam Zheng
e4934bb39c mips_cmgcr: Convert to DEFINE_PROP_LINK
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-19-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:43 +02:00
Fam Zheng
e9cb190ad4 ivshmem: Convert to DEFINE_PROP_LINK
Unlike the usual object_property_add_link() invocations in other
devices, ivshmem checks the "is mapped" state of the backend in addition
to qdev_prop_allow_set_link_before_realize. To convert it without
specializing DEFINE_PROP_LINK which always uses the qdev callback, move
the extra check to device realize time.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-12-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:43 +02:00
Fam Zheng
2de7e26891 dimm: Convert to DEFINE_PROP_LINK
Unlike the usual object_property_add_link() invocations in other
devices, dimm checks the "is mapped" state of the backend in addition to
qdev_prop_allow_set_link_before_realize. To convert it without
specializing DEFINE_PROP_LINK which always uses the qdev general check
callback, move the extra check to device realize time.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-11-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:43 +02:00
Fam Zheng
aa8f057e74 virtio-crypto: Convert to DEFINE_PROP_LINK
Unlike other object_property_add_link() occurrences in virtio devices,
virtio-crypto checks the "in use" state of the linked backend object in
addition to qdev_prop_allow_set_link_before_realize. To convert it
without needing to specialize DEFINE_PROP_LINK which always uses the
qdev callback, move the "in use" check to device realize time.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-10-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:43 +02:00
Fam Zheng
d1fd7f775e virtio-rng: Convert to DEFINE_PROP_LINK
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-9-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Fam Zheng
08f1ecd873 virtio-scsi: Convert to DEFINE_PROP_LINK
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-8-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Fam Zheng
d679ac09f0 virtio-blk: Convert to DEFINE_PROP_LINK
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-7-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Fam Zheng
1b6b7d109e qdev: Add const qualifier to PropertyInfo definitions
The remaining non-const ones are in e1000e which modifies description at
runtime. They can be addressed separatedly.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-6-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Fam Zheng
75ab905383 qmp: Use ObjectProperty.type if present
The dynamic value is more informative in the case of link property,
otherwise it is the same.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-5-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Fam Zheng
5b4ff3c661 qdev: Introduce DEFINE_PROP_LINK
This property can be used to replace the object_property_add_link in
device code, to add a link to other objects, which is a common pattern.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-4-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Fam Zheng
faabdbb792 qdev: Introduce PropertyInfo.create
This allows property implementation to provide a specialized property
creation method.

Update conditions guarding property types accordingly.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-3-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Igor Mammedov
8f5d58ef2c qom: enforce readonly nature of link's check callback
link's check callback is supposed to verify/permit setting it,
however currently nothing restricts it from misusing it
and modifying target object from within.
Make sure that readonly semantics are checked by compiler
to prevent callback's misuse.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170714021509.23681-2-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Emilio G. Cota
d40d3da00c translate-all: remove redundant !tcg_enabled check in dump_exec_info
This check is redundant because it is already performed by the only
caller of dump_exec_info -- the caller was updated by b7da97eef
("monitor: Check whether TCG is enabled before running the "info jit"
code").

Checking twice wouldn't necessarily be too bad, but here the check also
returns with tb_lock held. So we can either do the check before tb_lock is
acquired, or just get rid of it. Given that it is redundant, I am going
for the latter option.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Emilio G. Cota
f940488fea vl: fix breakage of -tb-size
Commit e7b161d573 ("vl: add tcg_enabled() for tcg related code") adds
a check to exit the program when !tcg_enabled() while parsing the -tb-size
flag.

It turns out that when the -tb-size flag is evaluated, tcg_enabled() can
only return 0, since it is set (or not) much later by configure_accelerator().

Fix it by unconditionally exiting if the flag is passed to a QEMU binary
built with !CONFIG_TCG.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Eric Blake
081dd1fe36 nbd: Implement NBD_INFO_BLOCK_SIZE on client
The upstream NBD Protocol has defined a new extension to allow
the server to advertise block sizes to the client, as well as
a way for the client to inform the server whether it intends to
obey block sizes.

When using the block layer as the client, we will obey block
sizes; but when used as 'qemu-nbd -c' to hand off to the
kernel nbd module as the client, we are still waiting for the
kernel to implement a way for us to learn if it will honor
block sizes (perhaps by an addition to sysfs, rather than an
ioctl), as well as any way to tell the kernel what additional
block sizes to obey (NBD_SET_BLKSIZE appears to be accurate
for the minimum size, but preferred and maximum sizes would
probably be new ioctl()s), so until then, we need to make our
request for block sizes conditional.

When using ioctl(NBD_SET_BLKSIZE) to hand off to the kernel,
use the minimum block size as the sector size if it is larger
than 512, which also has the nice effect of cooperating with
(non-qemu) servers that don't do read-modify-write when
exposing a block device with 4k sectors; it might also allow
us to visit a file larger than 2T on a 32-bit kernel.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-10-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Eric Blake
0c1d50bda7 nbd: Implement NBD_INFO_BLOCK_SIZE on server
The upstream NBD Protocol has defined a new extension to allow
the server to advertise block sizes to the client, as well as
a way for the client to inform the server that it intends to
obey block sizes.

Thanks to a recent fix (commit df7b97ff), our real minimum
transfer size is always 1 (the block layer takes care of
read-modify-write on our behalf), but we're still more efficient
if we advertise 512 when the client supports it, as follows:
- OPT_INFO, but no NBD_INFO_BLOCK_SIZE: advertise 512, then
fail with NBD_REP_ERR_BLOCK_SIZE_REQD; client is free to try
something else since we don't disconnect
- OPT_INFO with NBD_INFO_BLOCK_SIZE: advertise 512
- OPT_GO, but no NBD_INFO_BLOCK_SIZE: advertise 1
- OPT_GO with NBD_INFO_BLOCK_SIZE: advertise 512

We can also advertise the optimum block size (presumably the
cluster size, when exporting a qcow2 file), and our absolute
maximum transfer size of 32M, to help newer clients avoid
EINVAL failures or abrupt disconnects on oversize requests.

We do not reject clients for using the older NBD_OPT_EXPORT_NAME;
we are no worse off for those clients than we used to be.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-9-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Eric Blake
8ecaeae822 nbd: Implement NBD_OPT_GO on client
NBD_OPT_EXPORT_NAME is lousy: per the NBD protocol, any failure
requires the server to close the connection rather than report an
error to us.  Therefore, upstream NBD recently added NBD_OPT_GO as
the improved version of the option that does what we want [1]: it
reports sane errors on failures, and on success provides at least
as much info as NBD_OPT_EXPORT_NAME.

[1] https://github.com/NetworkBlockDevice/nbd/blob/extension-info/doc/proto.md

This is a first cut at use of the information types.  Note that we
do not need to use NBD_OPT_INFO, and that use of NBD_OPT_GO means
we no longer have to use NBD_OPT_LIST to learn whether a server
requires TLS (this requires servers that gracefully handle unknown
NBD_OPT, many servers prior to qemu 2.5 were buggy, but I have patched
qemu, upstream nbd, and nbdkit in the meantime, in part because of
interoperability testing with this patch).  We still fall back to
NBD_OPT_LIST when NBD_OPT_GO is not supported on the server, as it
is still one last chance for a nicer error message.  Later patches
will use further info, like NBD_INFO_BLOCK_SIZE.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-8-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Eric Blake
f37708f6b8 nbd: Implement NBD_OPT_GO on server
NBD_OPT_EXPORT_NAME is lousy: per the NBD protocol, any failure
requires us to close the connection rather than report an error.
Therefore, upstream NBD recently added NBD_OPT_GO as the improved
version of the option that does what we want [1], along with
NBD_OPT_INFO that returns the same information but does not
transition to transmission phase.

[1] https://github.com/NetworkBlockDevice/nbd/blob/extension-info/doc/proto.md

This is a first cut at the information types, and only passes the
same information already available through NBD_OPT_LIST and
NBD_OPT_EXPORT_NAME; items like NBD_INFO_BLOCK_SIZE (and thus any
use of NBD_REP_ERR_BLOCK_SIZE_REQD) are intentionally left for
later patches.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-7-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Eric Blake
23e099c34c nbd: Refactor reply to NBD_OPT_EXPORT_NAME
Reply directly in nbd_negotiate_handle_export_name(), rather than
waiting until nbd_negotiate_options() completes.  This will make it
easier to implement NBD_OPT_GO.  Pass additional parameters around,
rather than stashing things inside NBDClient.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-6-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Eric Blake
621c4f4eab nbd: Simplify trace of client flags in negotiation
Simplify the tracing of client flags in the server, and return -EINVAL
instead of -EIO if we successfully read but don't like those flags.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-5-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:42 +02:00
Eric Blake
3736cc5be3 nbd: Expose and debug more NBD constants
The NBD protocol has several constants defined in various extensions
that we are about to implement.  Expose them to the code, along with
an easy way to map various constants to strings during diagnostic
messages.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-4-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:41 +02:00
Eric Blake
37ec36f622 nbd: Don't bother tracing an NBD_OPT_ABORT response failure
We really don't care if our spec-compliant reply to NBD_OPT_ABORT
was received, so shave off some lines of code by not even tracing it.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-3-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:41 +02:00
Eric Blake
004a89fce9 nbd: Create struct for tracking export info
The NBD Protocol is introducing some additional information
about exports, such as minimum request size and alignment, as
well as an advertised maximum request size.  It will be easier
to feed this information back to the block layer if we gather
all the information into a struct, rather than adding yet more
pointer parameters during negotiation.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-2-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:41 +02:00
Alexey Kardashevskiy
1221a47467 memory/iommu: introduce IOMMUMemoryRegionClass
This finishes QOM'fication of IOMMUMemoryRegion by introducing
a IOMMUMemoryRegionClass. This also provides a fastpath analog for
IOMMU_MEMORY_REGION_GET_CLASS().

This makes IOMMUMemoryRegion an abstract class.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Message-Id: <20170711035620.4232-3-aik@ozlabs.ru>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:41 +02:00
Alexey Kardashevskiy
3df9d74806 memory/iommu: QOM'fy IOMMU MemoryRegion
This defines new QOM object - IOMMUMemoryRegion - with MemoryRegion
as a parent.

This moves IOMMU-related fields from MR to IOMMU MR. However to avoid
dymanic QOM casting in fast path (address_space_translate, etc),
this adds an @is_iommu boolean flag to MR and provides new helper to
do simple cast to IOMMU MR - memory_region_get_iommu. The flag
is set in the instance init callback. This defines
memory_region_is_iommu as memory_region_get_iommu()!=NULL.

This switches MemoryRegion to IOMMUMemoryRegion in most places except
the ones where MemoryRegion may be an alias.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20170711035620.4232-2-aik@ozlabs.ru>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:41 +02:00
Peng Hao
98fab4c163 chardev: fix parallel device can't be reconnect
Parallel device don't register be->chr_can_read function, but remote
disconnect event is handled in chr_read.So connected parallel device
can not detect remote disconnect event. The chardevs with chr_can_read=NULL
has the same problem.

Signed-off-by: Peng Hao <peng.hao2@zte.com.cn>
Reviewed-by: Wang Yechao <wang.yechao255@zte.com.cn>
Reviewed-by: Jiang Biao <jiang.biao2@zte.com.cn>
Message-Id: <1499874119-67558-1-git-send-email-peng.hao2@zte.com.cn>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:41 +02:00
Alex Bennée
5a6a1ad181 gdbstub: don't fail on vCont; C04:0; c packets
The thread-id of 0 means any CPU but we then ignore the fact we find
the first_cpu in this case who can have an index of 0. Instead of
bailing out just test if we have managed to match up thread-id to a
CPU.

Otherwise you get:
  gdb_handle_packet: command='vCont;C04:0;c'
  put_packet: reply='E22'

The actual reason for gdb sending vCont;C04:0;c was fixed in a
previous commit where we ensure the first_cpu's tid is correctly
reported to gdb however we should still behave correctly next time it
does send 0.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

Message-Id: <20170712105216.747-5-alex.bennee@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:41 +02:00
Alex Bennée
bd88c780e6 qom/cpu: remove host_tid field
This was only used by the gdbstub and even then was only being set for
subsequent threads. Rather the continue duplicating the number just
make the gdbstub get the information from TaskState structure.

Now the tid is correctly reported for all threads the bug I was seeing
with "vCont;C04:0;c" packets is fixed as the correct tid is reported
to gdb.

I moved cpu_gdb_index into the gdbstub to facilitate easy access to
the TaskState which is used elsewhere in gdbstub.

To prevent BSD failing to build I've included ts_tid into its
TaskStruct but not populated it - which was the same state as the old
cpu->host_tid. I'll leave it up to the BSD maintainers to actually
populate this properly if they want a working gdbstub with
user-threads.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>

Message-Id: <20170712105216.747-4-alex.bennee@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:41 +02:00
Alex Bennée
d2a6c8570b gdbstub: rename cpu_index -> cpu_gdb_index
This is to make it clear the index is purely a gdbstub function and
should not be confused with the value of cpu->cpu_index. At the same
time we move the function from the header to gdbstub itself which will
help with later changes.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

Message-Id: <20170712105216.747-3-alex.bennee@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:41 +02:00
Alex Bennée
118e226884 gdbstub: modernise DEBUG_GDB
Convert the a gdb_debug helper which compiles away to nothing when not
used but still ensures the format strings are checked. There is some
minor code motion for the incorrect checksum message to report it
before we attempt to send the reply.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Greg Kurz <groug@kaod.org>
Message-Id: <20170712105216.747-2-alex.bennee@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:41 +02:00
Pranith Kumar
cb58a6d361 mttcg/i386: Patch instruction using async_safe_* framework
In mttcg, calling pause_all_vcpus() during execution from the
generated TBs causes a deadlock if some vCPU is waiting for exclusive
execution in start_exclusive(). Fix this by using the aync_safe_*
framework instead of pausing vcpus for patching instructions.

CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <20170712215143.19594-2-bobby.prani@gmail.com>
[Get rid completely of the TCG-specific code. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 12:04:35 +02:00
Pranith Kumar
406bc339b0 Revert "exec.c: Fix breakpoint invalidation race"
Now that we have proper locking after MTTCG patches have landed, we
can revert the commit.  This reverts commit

a9353fe897.

CC: Peter Maydell <peter.maydell@linaro.org>
CC: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <20170712215143.19594-1-bobby.prani@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:05:19 +02:00
Prasad J Pandit
04bf2526ce exec: use qemu_ram_ptr_length to access guest ram
When accessing guest's ram block during DMA operation, use
'qemu_ram_ptr_length' to get ram block pointer. It ensures
that DMA operation of given length is possible; And avoids
any OOB memory access situations.

Reported-by: Alex <broscutamaker@gmail.com>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-Id: <20170712123840.29328-1-ppandit@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:34 +02:00
Anton Nefedov
1a29cc8f5e serial: chardev hotswap support
This allows to change the port's backend runtime, e.g. change it from
file to a socket making it possible to establish a debug session with
WinDbg

> qemu-system [..] -chardev file,id=charchannel2,path=/tmp/charchannel2 \
  -device isa-serial,chardev=charchannel2,id=channel2

QEMU 2.9.50 monitor - type 'help' for more information
(qemu) chardev-change charchannel2 \
  socket,host=127.0.0.1,port=4242,server,nowait

For a backend change, a number of ioctls has to be replayed to sync
the current setup of a frontend to a backend tty. This is hopefully
enough so we don't have to track, store and replay the whole original
control byte sequence.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <1499342940-56739-14-git-send-email-anton.nefedov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:34 +02:00
Anton Nefedov
757358425e serial: move TIOCM update to a separate function
will be used by the following patch

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1499342940-56739-13-git-send-email-anton.nefedov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:34 +02:00
Anton Nefedov
af50855c25 virtio-console: chardev hotswap support
In case of a backend change, the handler functions and the watch have
to be reset.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <1499342940-56739-12-git-send-email-anton.nefedov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:34 +02:00
Anton Nefedov
75b601602b hmp: add hmp analogue for qmp-chardev-change
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1499342940-56739-11-git-send-email-anton.nefedov@virtuozzo.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:34 +02:00
Anton Nefedov
7b5a9e453e test-char: add hotswap test
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1499342940-56739-10-git-send-email-anton.nefedov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:34 +02:00
Anton Nefedov
ae69e4823d test-char: split char_file_test
makes it possible to test the existing chardev-file

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1499342940-56739-9-git-send-email-anton.nefedov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:34 +02:00
Anton Nefedov
92ddfade9f test-char: split char_udp_test
makes it possible to test the existing chardev-udp

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1499342940-56739-8-git-send-email-anton.nefedov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:34 +02:00
Anton Nefedov
12f043c240 test-char: destroy chardev-udp after test
this is only not a problem if the test is last in a suite,
otherwise it makes the following main_loop() calls to fail

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1499342940-56739-7-git-send-email-anton.nefedov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Anton Nefedov
3065070153 char: avoid chardevice direct access
frontends should avoid accessing CharDriver struct where possible

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1499342940-56739-6-git-send-email-anton.nefedov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Anton Nefedov
7c44a2a9d1 char: forbid direct chardevice access for hotswap devices
qemu_chr_fe_get_driver() is unsafe, frontends with hotswap support
should not access CharDriver ptr directly as CharDriver might change.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1499342940-56739-5-git-send-email-anton.nefedov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Anton Nefedov
7bb86085e6 char: chardevice hotswap
This patch adds a possibility to change a char device without a frontend
removal.

Ideally, it would have to happen transparently to a frontend, i.e.
frontend would continue its regular operation.
However, backends are not stateless and are set up by the frontends
via qemu_chr_fe_<> functions, and it's not (generally) possible to replay
that setup entirely in a backend code, as different chardevs respond
to the setup calls differently, so do frontends work differently basing
on those setup responses.
Moreover, some frontend can generally get and save the backend pointer
(qemu_chr_fe_get_driver()), and it will become invalid after backend change.

So, a frontend which would like to support chardev hotswap has to register
a "backend change" handler, and redo its backend setup there.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <1499342940-56739-4-git-send-email-anton.nefedov@virtuozzo.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Anton Nefedov
81517ba37a char: add backend hotswap handler
Frontends should have an interface to setup the handler of a backend change.
The interface will be used in the next commits

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1499342940-56739-3-git-send-email-anton.nefedov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Anton Nefedov
313e45b5fe char: move QemuOpts->ChardevBackend translation to a separate func
parse function will be used by the following patch

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Message-Id: <1499342940-56739-2-git-send-email-anton.nefedov@virtuozzo.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Marc-André Lureau
bcdeb9be56 chardev: block during sync read
A sync read should block until all requested data is
available (instead of retrying in qemu_chr_fe_read_all). Change the
channel to blocking during sync_read.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170706170353.32601-1-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
2017-07-14 11:04:33 +02:00
Philippe Mathieu-Daudé
747969db2a MAINTAINERS: add entry for "Unimplemented" device
Also voluntary myself as reviewer

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170629150308.22766-6-f4bug@amsat.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Philippe Mathieu-Daudé
752a2ae273 MAINTAINERS: update TCI entry
moved in 244f1441 to tcg/

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170629150308.22766-5-f4bug@amsat.org>
Reviewed-by: Stefan Weil <sw@weilnetz.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Philippe Mathieu-Daudé
2c7ccb4e27 MAINTAINERS: update Xen entries
moved in 56e2cd24..28b99f47 to hw/xen/ and hw/i386/xen/

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
Message-Id: <20170629150308.22766-4-f4bug@amsat.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Philippe Mathieu-Daudé
1962cb2001 MAINTAINERS: update KVM entries
moved in 92229a57 to accel/

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170629150308.22766-3-f4bug@amsat.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Philippe Mathieu-Daudé
c6a88ddaac MAINTAINERS: update TCG entries
moved in a9ded601..244f1441 to accel/

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170629150308.22766-2-f4bug@amsat.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Paolo Bonzini
b98fcfd884 build: add -Wexpansion-to-defined
This warning is included in -Wall by clang, but not by GCC (which only
enables it for -Wextra).  Include it in the list of warnings we enable
to minimize the differences between the compilers:

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-14 11:04:33 +02:00
Peter Maydell
a309b290aa Merge remote-tracking branch 'remotes/armbru/tags/pull-error-2017-07-13' into staging
Error reporting patches for 2017-07-13

# gpg: Signature made Thu 13 Jul 2017 12:55:45 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-error-2017-07-13:
  Convert error_report*_err() to warn_report*_err()
  error: Implement the warn and free Error functions
  char-socket: Report TCP socket waiting as information
  Convert error_report() to warn_report()
  error: Functions to report warnings and informational messages
  util/qemu-error: Rename error_print_loc() to be more generic
  websock: Don't try to set *errp directly
  block: Don't try to set *errp directly
  xilinx: Fix latent error handling bug

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-14 09:36:40 +01:00
Janosch Frank
03f47ee49e s390x/kvm: Rework cmma management
Let's keep track of cmma enablement and move the mem_path check into
the actual enablement. This now also warns users that do not use
cpu-models about disabled cmma when using huge pages.

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-14 09:11:12 +02:00
Mark Cave-Ayland
abcea034a0 Update OpenBIOS images to fbc1b4a built from submodule.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-07-13 19:44:45 +01:00
Peter Maydell
49bcce4b9c Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-07-12' into staging
QAPI patches for 2017-07-12

# gpg: Signature made Wed 12 Jul 2017 17:07:20 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2017-07-12:
  scripts: use build_ prefix for string not piped through cgen()
  qobject: Update coccinelle script to catch Q{INC, DEC}REF
  qobject: Catch another straggler for use of qdict_put_str()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-13 16:56:06 +01:00
Peter Maydell
aa5a704756 Merge remote-tracking branch 'remotes/mjt/tags/trivial-patches-fetch' into staging
trivial patches for 2017-07-12

# gpg: Signature made Wed 12 Jul 2017 14:58:43 BST
# gpg:                using RSA key 0x701B4F6B1A693E59
# gpg: Good signature from "Michael Tokarev <mjt@tls.msk.ru>"
# gpg:                 aka "Michael Tokarev <mjt@corpit.ru>"
# gpg:                 aka "Michael Tokarev <mjt@debian.org>"
# Primary key fingerprint: 6EE1 95D1 886E 8FFB 810D  4324 457C E0A0 8044 65C5
#      Subkey fingerprint: 7B73 BAD6 8BE7 A2C2 8931  4B22 701B 4F6B 1A69 3E59

* remotes/mjt/tags/trivial-patches-fetch:
  include/hw/ptimer.h: Add documentation comments
  hxtool: remove dead -q option
  qga-win32: Fix memory leak of device information set
  hw/core: fix missing return value in load_image_targphys_as()
  elf-loader: warn about invalid endianness
  configure: Handle having no c++ compiler in FORTIFY_SOURCE check
  hw/pci: define msi_nonbroken in pci-stub
  hw/misc: add missing includes
  configure: Fix build with pkg-config and --static --enable-sdl
  util/qemu-sockets: Drop unused helper socket_address_to_string()
  target/xtensa: gdbstub: drop dead return statement

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-13 16:15:52 +01:00
Peter Maydell
76fba746ea Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2017-07-11' into staging
Block layer patches

# gpg: Signature made Tue 11 Jul 2017 17:05:56 BST
# gpg:                using RSA key 0xF407DB0061D5CF40
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* remotes/maxreitz/tags/pull-block-2017-07-11: (85 commits)
  iotests: Add preallocated growth test for qcow2
  iotests: Add preallocated resize test for raw
  block/qcow2: falloc/full preallocating growth
  block/qcow2: Rename "fail_block" to just "fail"
  block/qcow2: Add qcow2_refcount_area()
  block/qcow2: Metadata preallocation for truncate
  block/qcow2: Lock s->lock in preallocate()
  block/qcow2: Generalize preallocate()
  block/file-posix: Preallocation for truncate
  block/file-posix: Generalize raw_regular_truncate
  block/file-posix: Extract raw_regular_truncate()
  block/file-posix: Small fixes in raw_create()
  qemu-img: Expose PreallocMode for resizing
  block: Add PreallocMode to blk_truncate()
  block: Add PreallocMode to bdrv_truncate()
  block: Add PreallocMode to BD.bdrv_truncate()
  iotests: add test 178 for qemu-img measure
  qemu-iotests: support per-format golden output files
  qemu-img: add measure subcommand
  qcow2: add bdrv_measure() support
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-13 13:38:57 +01:00
Alistair Francis
88f83f3539 Convert error_report*_err() to warn_report*_err()
Convert all uses of error_report*_err("Warning:"... to use
warn_report*_err() instead. This helps standardise on a single
method of printing warnings to the user.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <d8e088757186955f40f04ec4f4be7f640d3c8660.1499866456.git.alistair.francis@xilinx.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-13 13:50:24 +02:00
Alistair Francis
e43ead1d0b error: Implement the warn and free Error functions
Implement warn_report_err() and warn_reportf_err() functions which
are the same as the error_report_err() and error_reportf_err()
functions except report a warning instead of an error.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <276ff93eadc0b01b8243cc61ffc331f77922c0d0.1499866456.git.alistair.francis@xilinx.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-13 13:50:19 +02:00
Alistair Francis
c51c4f8807 char-socket: Report TCP socket waiting as information
When QEMU is waiting for a TCP socket connection it reports that message as
an error. This isn't an error it is just information so let's change the
report to use info_report() instead.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <b3f3601c52843afca9a9b12c7a4fefd68e60de32.1499866456.git.alistair.francis@xilinx.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-13 13:50:09 +02:00
Alistair Francis
3dc6f86936 Convert error_report() to warn_report()
Convert all uses of error_report("warning:"... to use warn_report()
instead. This helps standardise on a single method of printing warnings
to the user.

All of the warnings were changed using these two commands:
    find ./* -type f -exec sed -i \
      's|error_report(".*warning[,:] |warn_report("|Ig' {} +

Indentation fixed up manually afterwards.

The test-qdev-global-props test case was manually updated to ensure that
this patch passes make check (as the test cases are case sensitive).

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Suggested-by: Thomas Huth <thuth@redhat.com>
Cc: Jeff Cody <jcody@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Max Reitz <mreitz@redhat.com>
Cc: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Lieven <pl@kamp.de>
Cc: Josh Durgin <jdurgin@redhat.com>
Cc: "Richard W.M. Jones" <rjones@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Greg Kurz <groug@kaod.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Peter Chubb <peter.chubb@nicta.com.au>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Alexander Graf <agraf@suse.de>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Greg Kurz <groug@kaod.org>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed by: Peter Chubb <peter.chubb@data61.csiro.au>
Acked-by: Max Reitz <mreitz@redhat.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Message-Id: <e1cfa2cd47087c248dd24caca9c33d9af0c499b0.1499866456.git.alistair.francis@xilinx.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-13 13:49:58 +02:00
Alistair Francis
97f40301f1 error: Functions to report warnings and informational messages
Add warn_report(), warn_vreport() for reporting warnings, and
info_report(), info_vreport() for informational messages.

These are implemented them with a helper function factored out of
error_vreport(), suitably generalized. This patch makes no changes
to the output of the original error_report() function.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <c89e9980019f296ec9aa38d7689ac4d5c369296d.1499866456.git.alistair.francis@xilinx.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-13 13:49:54 +02:00
Peter Maydell
f0d2ead97c Merge remote-tracking branch 'remotes/yongbok/tags/mips-20170711' into staging
MIPS patches 2017-07-11

Changes:
* Fix MSA copy_[s|u]_df corner case of rd = 0
* Update malta to load the initrd at the end of the low memory

# gpg: Signature made Tue 11 Jul 2017 15:42:20 BST
# gpg:                using RSA key 0x2238EB86D5F797C2
# gpg: Good signature from "Yongbok Kim <yongbok.kim@imgtec.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 8600 4CF5 3415 A5D9 4CFA  2B5C 2238 EB86 D5F7 97C2

* remotes/yongbok/tags/mips-20170711:
  mips/malta: load the initrd at the end of the low memory
  target/mips: fix msa copy_[s|u]_df rd = 0 corner case

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-13 12:48:37 +01:00
Alistair Francis
beeb175c0d util/qemu-error: Rename error_print_loc() to be more generic
Rename the error_print_loc() function in preparation for using it to
print warnings as well.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <661b215695db878a0aef8401b506fb3da50e981a.1499866456.git.alistair.francis@xilinx.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-13 13:46:53 +02:00
Eduardo Habkost
e79ea67a97 websock: Don't try to set *errp directly
Assigning directly to *errp is not valid, as errp may be NULL,
&error_fatal, or &error_abort.  Use error_propagate() instead.

Cc: "Daniel P. Berrange" <berrange@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170608133906.12737-4-ehabkost@redhat.com>
Reviewed-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-13 13:45:53 +02:00
Eduardo Habkost
57ef3f1278 block: Don't try to set *errp directly
Assigning directly to *errp is not valid, as errp may be NULL,
&error_fatal, or &error_abort.  Use error_propagate() instead.

With this, there's no need to check if errp is NULL anymore, as
error_propagate() and error_prepend() are able to handle that.

Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Max Reitz <mreitz@redhat.com>
Cc: qemu-block@nongnu.org
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170608133906.12737-3-ehabkost@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-13 13:45:53 +02:00
Eduardo Habkost
a9859c90a5 xilinx: Fix latent error handling bug
Assigning directly to *errp is not valid, as errp may be null,
&error_fatal, or &error_abort.  The !*errp conditional protects
against the latter two, but we then leak @local_err.  Fortunately,
the qdev core always passes pointer to null, so this is "merely" a
latent bug.

Use error_propagate() instead.

Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com>
Cc: Alistair Francis <alistair.francis@xilinx.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: qemu-arm@nongnu.org
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170608133906.12737-2-ehabkost@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Commit message clarified]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-13 13:45:53 +02:00
Peter Maydell
6e2c463343 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170711' into staging
target-arm queue:
 * v7M: ignore writes to CONTROL.SPSEL from Thread mode
 * KVM: Enable in-kernel timers with user space gic
 * aspeed: Register all watchdogs
 * hw/misc: Add Exynos4210 Pseudo Random Number Generator

# gpg: Signature made Tue 11 Jul 2017 11:28:15 BST
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170711:
  target-arm: v7M: ignore writes to CONTROL.SPSEL from Thread mode
  ARM: KVM: Enable in-kernel timers with user space gic
  aspeed: Register all watchdogs
  hw/misc: Add Exynos4210 Pseudo Random Number Generator

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-13 10:47:10 +01:00
Marc-André Lureau
086ee7a620 scripts: use build_ prefix for string not piped through cgen()
The gen_ prefix is awkward.  Generated C should go through cgen()
exactly once (see commit 1f9a7a1).  The common way to get this wrong is
passing a foo=gen_foo() keyword argument to mcgen().  I'd like us to
adopt a naming convention where gen_ means "something that's been piped
through cgen(), and thus must not be passed to cgen() or mcgen()".
Requires renaming gen_params(), gen_marshal_proto() and
gen_event_send_proto().

Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170601124143.10915-1-marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-12 17:51:54 +02:00
Eric Blake
8a4613a0ab qobject: Update coccinelle script to catch Q{INC, DEC}REF
The recent commit b097efc0 used qobject_decref(QOBJECT(E)), even
though we already have QDECREF(E) for that purpose.  We can update
our coccinelle script to catch any future relapses; with that in
place, the rest of the patch is generated with:
 spatch --sp-file scripts/coccinelle/qobject.cocci \
        --macro-file scripts/cocci-macro-file.h --dir . --in-place

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170624181008.25497-3-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-12 17:51:54 +02:00
Eric Blake
187f47e947 qobject: Catch another straggler for use of qdict_put_str()
Dan's addition of key-secret improvements in commit 29cf9336 was
developed prior to the addition of QDict scalar insertion macros,
but merged after the general cleanup in commit 46f5ac20.
Patch created mechanically by rerunning:
  spatch --sp-file scripts/coccinelle/qobject.cocci \
         --macro-file scripts/cocci-macro-file.h --dir . --in-place

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-Id: <20170624181008.25497-2-eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-12 17:51:54 +02:00
Peter Maydell
31fe1c4145 Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Tue 11 Jul 2017 09:35:26 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  backends: remove empty trace-events file
  trace: Fix early setting of events with the "vcpu" property

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-11 17:13:49 +01:00
Max Reitz
ced1484322 iotests: Add preallocated growth test for qcow2
Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170613202107.10125-17-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:02 +02:00
Max Reitz
a2c7e08212 iotests: Add preallocated resize test for raw
Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170613202107.10125-16-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:02 +02:00
Max Reitz
772d1f973f block/qcow2: falloc/full preallocating growth
Implement the preallocation modes falloc and full for growing qcow2
images.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170613202107.10125-15-mreitz@redhat.com
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:02 +02:00
Max Reitz
60c48a29b7 block/qcow2: Rename "fail_block" to just "fail"
Now alloc_refcount_block() only contains a single fail label, so it
makes more sense to just name it "fail" instead of "fail_block".

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170613202107.10125-14-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:02 +02:00
Max Reitz
12cc30a8cb block/qcow2: Add qcow2_refcount_area()
This function creates a collection of self-describing refcount
structures (including a new refcount table) at the end of a qcow2 image
file. Optionally, these structures can also describe a number of
additional clusters beyond themselves; this will be important for
preallocated truncation, which will place the data clusters and L2
tables there.

For now, we can use this function to replace the part of
alloc_refcount_block() that grows the refcount table (from which it is
actually derived).

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170613202107.10125-13-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:02 +02:00
Max Reitz
95b98f343b block/qcow2: Metadata preallocation for truncate
We can support PREALLOC_MODE_METADATA by invoking preallocate() in
qcow2_truncate().

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170613202107.10125-12-mreitz@redhat.com
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:02 +02:00
Max Reitz
652fecd005 block/qcow2: Lock s->lock in preallocate()
preallocate() is and will be called only from places that do not
otherwise need to lock s->lock: Currently that is qcow2_create2(), as of
a future patch it will be called from qcow2_truncate(), too.

It therefore makes sense to move locking that mutex into preallocate()
itself.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170613202107.10125-11-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:02 +02:00
Max Reitz
7bc45dc172 block/qcow2: Generalize preallocate()
This patch adds two new parameters to the preallocate() function so we
will be able to use it not just for preallocating a new image but also
for preallocated image growth.

The offset parameter allows the caller to specify a virtual offset from
which to start preallocating. For newly created images this is always 0,
but for preallocating growth this will be the old image length.

The new_length parameter specifies the supposed new length of the image
(basically the "end offset" for preallocation). During image truncation,
bdrv_getlength() will return the old image length so we cannot rely on
its return value then.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170613202107.10125-10-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:02 +02:00
Max Reitz
35d72602ec block/file-posix: Preallocation for truncate
By using raw_regular_truncate() in raw_truncate(), we can now easily
support preallocation.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170613202107.10125-9-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:01 +02:00
Max Reitz
d0bc9e5d5e block/file-posix: Generalize raw_regular_truncate
Currently, raw_regular_truncate() is intended for setting the size of a
newly created file. However, we also want to use it for truncating an
existing file in which case only the newly added space (when growing)
should be preallocated.

This also means that if resizing failed, we should try to restore the
original file size. This is important when using preallocation.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170613202107.10125-8-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:01 +02:00
Max Reitz
9f63b07ee7 block/file-posix: Extract raw_regular_truncate()
This functionality is part of raw_create() which we will be able to
reuse nicely in raw_truncate().

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170613202107.10125-7-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:01 +02:00
Max Reitz
7dacd8bd3d block/file-posix: Small fixes in raw_create()
Variables should be declared at the start of a block, and if a certain
parameter value is not supported it may be better to return -ENOTSUP
instead of -EINVAL.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170613202107.10125-6-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:01 +02:00
Max Reitz
dc5f690b97 qemu-img: Expose PreallocMode for resizing
Add a --preallocation command line option to qemu-img resize which can
be used to set the PreallocMode parameter of blk_truncate().

While touching this code, fix the fact that we did not handle errors
returned by blk_getlength().

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170613202107.10125-5-mreitz@redhat.com
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:01 +02:00
Max Reitz
3a691c50f1 block: Add PreallocMode to blk_truncate()
blk_truncate() itself will pass that value to bdrv_truncate(), and all
callers of blk_truncate() just set the parameter to PREALLOC_MODE_OFF
for now.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170613202107.10125-4-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:01 +02:00
Max Reitz
7ea37c3066 block: Add PreallocMode to bdrv_truncate()
For block drivers that just pass a truncate request to the underlying
protocol, we can now pass the preallocation mode instead of aborting if
it is not PREALLOC_MODE_OFF.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170613202107.10125-3-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:01 +02:00
Max Reitz
8243ccb743 block: Add PreallocMode to BD.bdrv_truncate()
Add a PreallocMode parameter to the bdrv_truncate() function implemented
by each block driver. Currently, we always pass PREALLOC_MODE_OFF and no
driver accepts anything else.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170613202107.10125-2-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:01 +02:00
Stefan Hajnoczi
32a1681adc iotests: add test 178 for qemu-img measure
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170705125738.8777-10-stefanha@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:01 +02:00
Stefan Hajnoczi
217a0683b7 qemu-iotests: support per-format golden output files
Some tests produce format-dependent output.  Either the difference is
filtered out and ignored, or the test case is format-specific so we
don't need to worry about per-format output differences.

There is a third case: the test script is the same for all image formats
and the format-dependent output is relevant.  An ugly workaround is to
copy-paste the test into multiple per-format test cases.  This
duplicates code and is not maintainable.

This patch allows test cases to add per-format golden output files so a
single test case can work correctly when format-dependent output must be
checked:

  123.out.qcow2
  123.out.raw
  123.out.vmdk
  ...

This naming scheme is not composable with 123.out.nocache or 123.pc.out,
two other scenarios where output files are split.  I don't think it
matters since few test cases need these features.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170705125738.8777-9-stefanha@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:01 +02:00
Stefan Hajnoczi
fd03c2b8fb qemu-img: add measure subcommand
The measure subcommand calculates the size required by a new image file.
This can be used by users or management tools that need to allocate
space on an LVM volume, SAN LUN, etc before creating or converting an
image file.

Suggested-by: Maor Lipchuk <mlipchuk@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170705125738.8777-8-stefanha@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:01 +02:00
Stefan Hajnoczi
c501c35220 qcow2: add bdrv_measure() support
Use qcow2_calc_prealloc_size() to get the required file size.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170705125738.8777-7-stefanha@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Stefan Hajnoczi
0eb4a8c1df qcow2: extract image creation option parsing
The image creation options parsed by qcow2_create() are also needed to
implement .bdrv_measure().  Extract the parsing code, including input
validation.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170705125738.8777-6-stefanha@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Stefan Hajnoczi
7c5bcc4212 qcow2: make refcount size calculation conservative
The refcount metadata size calculation is inaccurate and can produce
numbers that are too small.  This is bad because we should calculate a
conservative number - one that is guaranteed to be large enough.

This patch switches the approach to a fixed point calculation because
the existing equation is hard to solve when inaccuracies are taken care
of.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170705125738.8777-5-stefanha@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Stefan Hajnoczi
95c67e3bd7 qcow2: extract preallocation calculation function
Calculating the preallocated image size will be needed to implement
.bdrv_measure().  Extract the code out into a separate function.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170705125738.8777-4-stefanha@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Stefan Hajnoczi
a843a22a82 raw-format: add bdrv_measure() support
Maximum size calculation is trivial for the raw format: it's just the
requested image size (because there is no metadata).

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170705125738.8777-3-stefanha@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Stefan Hajnoczi
90880ff107 block: add bdrv_measure() API
bdrv_measure() provides a conservative maximum for the size of a new
image.  This information is handy if storage needs to be allocated (e.g.
a SAN or an LVM volume) ahead of time.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170705125738.8777-2-stefanha@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Eric Blake
b43671f80c tests: Avoid non-portable 'echo -ARG'
POSIX says that backslashes in the arguments to 'echo', as well as
any use of 'echo -n' and 'echo -e', are non-portable; it recommends
people should favor 'printf' instead.  This is definitely true where
we do not control which shell is running (such as in makefile snippets
or in documentation examples).  But even for scripts where we
require bash (and therefore, where echo does what we want by default),
it is still possible to use 'shopt -s xpg_echo' to change bash's
behavior of echo.  And setting a good example never hurts when we are
not sure if a snippet will be copied from a bash-only script to a
general shell script (although I don't change the use of non-portable
\e for ESC when we know the running shell is bash).

Replace 'echo -n "..."' with 'printf %s "..."', and 'echo -e "..."'
with 'printf %b "...\n"', with the optimization that the %s/%b
argument can be omitted if the string being printed is a strict
literal with no '%', '$', or '`' (we could technically also make
this optimization when there are $ or `` substitutions but where
we can prove their results will not be problematic, but proving
that such substitutions are safe makes the patch less trivial
compared to just being consistent).

In the qemu-iotests check script, fix unusual shell quoting
that would result in word-splitting if 'date' outputs a space.

In test 051, take an opportunity to shorten the line.

In test 068, get rid of a pointless second invocation of bash.

CC: qemu-trivial@nongnu.org
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 20170703180950.9895-1-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Max Reitz
6f55dfa4a4 iotests: Add test for colon handling
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170702150510.23276-3-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Max Reitz
3190683ea3 iotests: Use absolute paths for executables
A user may specify a relative path for accessing qemu, qemu-img, etc.
through environment variables ($QEMU_PROG and friends) or a symlink.

If a test decides to change its working directory, relative paths will
cease to work, however. Work around this by making all of the paths to
programs that should undergo testing absolute. Besides "realpath", we
also have to use "type -p" to support programs in $PATH.

As a side effect, this fixes specifying these programs as symlinks for
out-of-tree builds: Before, you would have to create two symlinks, one
in the build and one in the source tree (the first one for common.config
to find, the second one for the iotest to use). Now it is sufficient to
create one in the build tree because common.config will resolve it.

Reported-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170702150510.23276-2-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Daniel P. Berrange
ae50b71db0 iotests: chown LUKS device before qemu-io launches
On some distros, whenever you close a block device file
descriptor there is a udev rule that resets the file
permissions. This can race with the test script when
we run qemu-io multiple times against the same block
device. Occasionally the second qemu-io invocation
will find udev has reset the permissions causing failure.

Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170626123510.20134-6-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Daniel P. Berrange
a488e71e1e iotests: add more LUKS hash combination tests
Add tests for sha224, sha512, sha384 and ripemd160 hash
algorithms.

Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170626123510.20134-5-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Daniel P. Berrange
307d999198 iotests: reduce PBKDF iterations when testing LUKS
By default the PBKDF algorithm used with LUKS is tuned
based on the number of iterations to produce 1 second
of running time. This makes running the I/O test with
the LUKS format orders of magnitude slower than with
qcow2/raw formats.

When creating LUKS images, set the iteration time to
a 10ms to reduce the time overhead for LUKS, since
security does not matter in I/O tests.

Previously a full 'check -luks' would take

  $ time ./check -luks
  Passed all 22 tests

  real  23m9.988s
  user  21m46.223s
  sys   0m22.841s

Now it takes

  $ time ./check -luks
  Passed all 22 tests

  real  4m39.235s
  user  3m29.590s
  sys   0m24.234s

Still slow compared to qcow2/raw, but much improved
none the less.

Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170626123510.20134-4-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:45:00 +02:00
Daniel P. Berrange
13a1d4a71b iotests: fix remainining tests to work with LUKS
The tests 033, 140, 145 and 157 were all broken
when run with LUKS, since they did not correctly use
the required image opts args syntax to specify the
decryption secret. Further, the 120 test simply does
not make sense to run with luks, as the scenario
exercised is not relevant.

The test 181 was broken when run with LUKS because
it didn't take account of fact that $TEST_IMG was
already in image opts syntax. The launch_qemu
helper also didn't register the secret object
providing the LUKS password.

Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170626123510.20134-3-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Daniel P. Berrange
2c6f600642 iotests: skip 159 & 170 with luks format
While the qemu-img dd command does accept --image-opts
this is not sufficient to make it work with the LUKS
image yet. This is because bdrv_create() still always
requires the non-image-opts syntax.

Thus we must skip 159/170 with luks for now

Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170626123510.20134-2-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Vladimir Sementsov-Ogievskiy
615b5dcf2d block: release persistent bitmaps on inactivate
We should release them here to reload on invalidate cache.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170628120530.31251-31-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Vladimir Sementsov-Ogievskiy
5c36c1af27 qmp: block-dirty-bitmap-remove: remove persistent
Remove persistent bitmap from the storage on block-dirty-bitmap-remove.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-30-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Vladimir Sementsov-Ogievskiy
469c71edc7 qcow2: add .bdrv_remove_persistent_dirty_bitmap
Realize .bdrv_remove_persistent_dirty_bitmap interface.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-29-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Vladimir Sementsov-Ogievskiy
56f364e6d7 block/dirty-bitmap: add bdrv_remove_persistent_dirty_bitmap
Interface for removing persistent bitmap from its storage.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-28-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Vladimir Sementsov-Ogievskiy
fc905d3a0c iotests: test qcow2 persistent dirty bitmap
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170628120530.31251-27-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Vladimir Sementsov-Ogievskiy
a3b52535e8 qmp: add x-debug-block-dirty-bitmap-sha256
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20170628120530.31251-26-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Vladimir Sementsov-Ogievskiy
eb738bb50f qmp: add autoload parameter to block-dirty-bitmap-add
Optional. Default is false.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-25-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Vladimir Sementsov-Ogievskiy
fd5ae4ccbe qmp: add persistent flag to block-dirty-bitmap-add
Add optional 'persistent' flag to qmp command block-dirty-bitmap-add.
Default is false.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-24-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Vladimir Sementsov-Ogievskiy
da0eb242ad qcow2: add .bdrv_can_store_new_dirty_bitmap
Realize .bdrv_can_store_new_dirty_bitmap interface.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170628120530.31251-23-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Vladimir Sementsov-Ogievskiy
67b792f5ed block: add bdrv_can_store_new_dirty_bitmap
This will be needed to check some restrictions before making bitmap
persistent in qmp-block-dirty-bitmap-add (this functionality will be
added by future patch)

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-22-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:59 +02:00
Vladimir Sementsov-Ogievskiy
169b879359 qcow2: store bitmaps on reopening image as read-only
Store bitmaps and mark them read-only on reopening image as read-only.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170628120530.31251-21-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
5f72826e7f qcow2: add persistent dirty bitmaps support
Store persistent dirty bitmaps in qcow2 image.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170628120530.31251-20-vsementsov@virtuozzo.com
[mreitz: Always assign ret in store_bitmap() in case of an error]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
3dd10a06d1 block/dirty-bitmap: add bdrv_dirty_bitmap_next()
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-19-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
a88b179fdb block: introduce persistent dirty bitmaps
New field BdrvDirtyBitmap.persistent means, that bitmap should be saved
by format driver in .bdrv_close and .bdrv_inactivate. No format driver
supports it for now.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20170628120530.31251-18-vsementsov@virtuozzo.com
[mreitz: Fixed indentation]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
cca43ae1e1 block: bdrv_close: release bitmaps after drv->bdrv_close
Release bitmaps after 'if (bs->drv) { ... }' block. This will allow
format driver to save persistent bitmaps, which will appear in following
commits.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170628120530.31251-17-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
a0319aacd4 block/dirty-bitmap: add autoload field to BdrvDirtyBitmap
Mirror AUTO flag from Qcow2 bitmap in BdrvDirtyBitmap. This will be
needed in future, to save this flag back to Qcow2 for persistent
bitmaps.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20170628120530.31251-16-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
1b6b0562db qcow2: support .bdrv_reopen_bitmaps_rw
Realize bdrv_reopen_bitmaps_rw interface.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170628120530.31251-15-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
cb9ff6c25a block: new bdrv_reopen_bitmaps_rw interface
Add format driver handler, which should mark loaded read-only
bitmaps as 'IN_USE' in the image and unset read_only field in
corresponding BdrvDirtyBitmap's.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170628120530.31251-14-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
50bf65bab6 block: refactor bdrv_reopen_commit
Add bs local variable to simplify code.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-13-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
d1258dd0c8 qcow2: autoloading dirty bitmaps
Auto loading bitmaps are bitmaps in Qcow2, with the AUTO flag set. They
are loaded when the image is opened and become BdrvDirtyBitmaps for the
corresponding drive.

Extra data in bitmaps is not supported for now.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-12-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
d6883bc968 block/dirty-bitmap: add readonly field to BdrvDirtyBitmap
It will be needed in following commits for persistent bitmaps.
If bitmap is loaded from read-only storage (and we can't mark it
"in use" in this storage) corresponding BdrvDirtyBitmap should be
read-only.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20170628120530.31251-11-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
8bfc932e1e block/dirty-bitmap: fix comment for BlockDirtyBitmap.disabled field
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170628120530.31251-10-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:58 +02:00
Vladimir Sementsov-Ogievskiy
88ddffae8f qcow2: add bitmaps extension
Add bitmap extension as specified in docs/specs/qcow2.txt.
For now, just mirror extension header into Qcow2 state and check
constraints. Also, calculate refcounts for qcow2 bitmaps, to not break
qemu-img check.

For now, disable image resize if it has bitmaps. It will be fixed later.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-9-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
Vladimir Sementsov-Ogievskiy
8a5bb1f114 qcow2-refcount: rename inc_refcounts() and make it public
This is needed for the following patch, which will introduce refcounts
checking for qcow2 bitmaps.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-8-vsementsov@virtuozzo.com
[mreitz: s/inc_refcounts/qcow2_inc_refcounts_imrt/ in one more (new)
         place]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
Vladimir Sementsov-Ogievskiy
6bdc8b719a block/dirty-bitmap: add deserialize_ones func
Add bdrv_dirty_bitmap_deserialize_ones() function, which is needed for
qcow2 bitmap loading, to handle unallocated bitmap parts, marked as
all-ones.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-7-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
Vladimir Sementsov-Ogievskiy
ba06ff1a5c block: fix bdrv_dirty_bitmap_granularity signature
Make getter signature const-correct. This allows other functions with
const dirty bitmap parameter use bdrv_dirty_bitmap_granularity().

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 20170628120530.31251-6-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
Vladimir Sementsov-Ogievskiy
eedc4b6d8d tests: add hbitmap iter test
Test that hbitmap iter is resistant to bitmap resetting.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-5-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
Vladimir Sementsov-Ogievskiy
f63ea4e92b hbitmap: improve dirty iter
Make dirty iter resistant to resetting bits in corresponding HBitmap.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-4-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
Vladimir Sementsov-Ogievskiy
b348c262cc specs/qcow2: do not use wording 'bitmap header'
A bitmap directory entry is sometimes called a 'bitmap header'. This
patch leaves only one name - 'bitmap directory entry'. The name 'bitmap
header' creates misunderstandings with 'qcow2 header' and 'qcow2 bitmap
header extension' (which is extension of qcow2 header)

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-3-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
Vladimir Sementsov-Ogievskiy
b5d1f15488 specs/qcow2: fix bitmap granularity qemu-specific note
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 20170628120530.31251-2-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
sochin.jiang
5ce6bfe255 mirror: Fix inconsistent backing AioContext for after mirroring
mirror_complete opens the backing chain, which should have the same
AioContext as the top when using iothreads. Make the code guarantee
this, which fixes a failed assertion in bdrv_attach_child.

Signed-off-by: sochin.jiang <sochin.jiang@huawei.com>
Message-id: 1498475064-39816-1-git-send-email-sochin.jiang@huawei.com
[mreitz: Reworded commit message]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
Max Reitz
a9ed6a9193 iotests: 181 does not work for all formats
Test 181 only works for formats which support live migration (naturally,
as it is a live migration test). Disable it for all formats which do
not.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170621131157.16584-1-mreitz@redhat.com
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
Daniel P. Berrange
12f7efd02e docs: document encryption options for qcow, qcow2 and luks
Expand the image format docs to cover the new options for
the qcow, qcow2 and luks disk image formats

Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-21-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
Daniel P. Berrange
0a12f6f80e qcow2: report encryption specific image information
Currently 'qemu-img info' reports a simple "encrypted: yes"
field. This is not very useful now that qcow2 can support
multiple encryption formats. Users want to know which format
is in use and some data related to it.

Wire up usage of the qcrypto_block_get_info() method so that
'qemu-img info' can report about the encryption format
and parameters in use

  $ qemu-img create \
      --object secret,id=sec0,data=123456 \
      -o encrypt.format=luks,encrypt.key-secret=sec0 \
      -f qcow2 demo.qcow2 1G
  Formatting 'demo.qcow2', fmt=qcow2 size=1073741824 \
  encryption=off encrypt.format=luks encrypt.key-secret=sec0 \
  cluster_size=65536 lazy_refcounts=off refcount_bits=16

  $ qemu-img info demo.qcow2
  image: demo.qcow2
  file format: qcow2
  virtual size: 1.0G (1073741824 bytes)
  disk size: 480K
  encrypted: yes
  cluster_size: 65536
  Format specific information:
      compat: 1.1
      lazy refcounts: false
      refcount bits: 16
      encrypt:
          ivgen alg: plain64
          hash alg: sha256
          cipher alg: aes-256
          uuid: 3fa930c4-58c8-4ef7-b3c5-314bb5af21f3
          format: luks
          cipher mode: xts
          slots:
              [0]:
                  active: true
                  iters: 1839058
                  key offset: 4096
                  stripes: 4000
              [1]:
                  active: false
                  key offset: 262144
              [2]:
                  active: false
                  key offset: 520192
              [3]:
                  active: false
                  key offset: 778240
              [4]:
                  active: false
                  key offset: 1036288
              [5]:
                  active: false
                  key offset: 1294336
              [6]:
                  active: false
                  key offset: 1552384
              [7]:
                  active: false
                  key offset: 1810432
          payload offset: 2068480
          master key iters: 438487
      corrupt: false

With the legacy "AES" encryption we just report the format
name

  $ qemu-img create \
      --object secret,id=sec0,data=123456 \
      -o encrypt.format=aes,encrypt.key-secret=sec0 \
      -f qcow2 demo.qcow2 1G
  Formatting 'demo.qcow2', fmt=qcow2 size=1073741824 \
  encryption=off encrypt.format=aes encrypt.key-secret=sec0 \
  cluster_size=65536 lazy_refcounts=off refcount_bits=16

  $ ./qemu-img info demo.qcow2
  image: demo.qcow2
  file format: qcow2
  virtual size: 1.0G (1073741824 bytes)
  disk size: 196K
  encrypted: yes
  cluster_size: 65536
  Format specific information:
      compat: 1.1
      lazy refcounts: false
      refcount bits: 16
      encrypt:
          format: aes
      corrupt: false

Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-20-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:57 +02:00
Daniel P. Berrange
1cd9a787a2 block: pass option prefix down to crypto layer
While the crypto layer uses a fixed option name "key-secret",
the upper block layer may have a prefix on the options. e.g.
"encrypt.key-secret", in order to avoid clashes between crypto
option names & other block option names. To ensure the crypto
layer can report accurate error messages, we must tell it what
option name prefix was used.

Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-19-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:56 +02:00
Daniel P. Berrange
c01c214b69 block: remove all encryption handling APIs
Now that all encryption keys must be provided upfront via
the QCryptoSecret API and associated block driver properties
there is no need for any explicit encryption handling APIs
in the block layer. Encryption can be handled transparently
within the block driver. We only retain an API for querying
whether an image is encrypted or not, since that is a
potentially useful piece of metadata to report to the user.

Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-18-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:56 +02:00
Daniel P. Berrange
788cf9f8c8 block: rip out all traces of password prompting
Now that qcow & qcow2 are wired up to get encryption keys
via the QCryptoSecret object, nothing is relying on the
interactive prompting for passwords. All the code related
to password prompting can thus be ripped out.

Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-17-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:56 +02:00
Daniel P. Berrange
23f831c331 iotests: enable tests 134 and 158 to work with qcow (v1)
The 138 and 158 iotests exercise the legacy qcow2 aes encryption
code path and they work fine with qcow v1 too.

Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-16-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:56 +02:00
Daniel P. Berrange
426d52d88c qcow2: add iotests to cover LUKS encryption support
This extends the 087 iotest to cover LUKS encryption when doing
blockdev-add.

Two further tests are added to validate read/write of LUKS
encrypted images with a single file and with a backing file.

Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-15-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:56 +02:00
Daniel P. Berrange
4652b8f3e1 qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.

  # qemu-img create --object secret,data=123456,id=sec0 \
       -f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
       test.qcow2 10G

The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:

  # qemu-img create --object secret,data=123456,id=sec0 \
       -f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
       test.qcow2 10G

 # qemu-img create --object secret,data=123456,id=sec0 \
       -f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
       test.qcow2 10G

With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.

Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:56 +02:00
Daniel P. Berrange
7674b5754e qcow2: extend specification to cover LUKS encryption
Update the qcow2 specification to describe how the LUKS header is
placed inside a qcow2 file, when using LUKS encryption for the
qcow2 payload instead of the legacy AES-CBC encryption

Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-13-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:56 +02:00
Daniel P. Berrange
b25b387fa5 qcow2: convert QCow2 to use QCryptoBlock for encryption
This converts the qcow2 driver to make use of the QCryptoBlock
APIs for encrypting image content, using the legacy QCow2 AES
scheme.

With this change it is now required to use the QCryptoSecret
object for providing passwords, instead of the current block
password APIs / interactive prompting.

  $QEMU \
    -object secret,id=sec0,file=/home/berrange/encrypted.pw \
    -drive file=/home/berrange/encrypted.qcow2,encrypt.key-secret=sec0

The test 087 could be simplified since there is no longer a
difference in behaviour when using blockdev_add with encrypted
images for the running vs stopped CPU state.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-12-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:56 +02:00
Daniel P. Berrange
446d306d23 qcow2: make qcow2_encrypt_sectors encrypt in place
Instead of requiring separate input/output buffers for
encrypting data, change qcow2_encrypt_sectors() to assume
use of a single buffer, encrypting in place. The current
callers all used the same buffer for input/output already.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-11-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:56 +02:00
Daniel P. Berrange
d85f4222b4 qcow: convert QCow to use QCryptoBlock for encryption
This converts the qcow driver to make use of the QCryptoBlock
APIs for encrypting image content. This is only wired up to
permit use of the legacy QCow encryption format. Users who wish
to have the strong LUKS format should switch to qcow2 instead.

With this change it is now required to use the QCryptoSecret
object for providing passwords, instead of the current block
password APIs / interactive prompting.

  $QEMU \
    -object secret,id=sec0,file=/home/berrange/encrypted.pw \
    -drive file=/home/berrange/encrypted.qcow,encrypt.format=aes,\
           encrypt.key-secret=sec0

Though note that running QEMU system emulators with the AES
encryption is no longer supported, so while the above syntax
is valid, QEMU will refuse to actually run the VM in this
particular example.

Likewise when creating images with the legacy AES-CBC format

  qemu-img create -f qcow \
    --object secret,id=sec0,file=/home/berrange/encrypted.pw \
    -o encrypt.format=aes,encrypt.key-secret=sec0 \
    /home/berrange/encrypted.qcow 64M

Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-10-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:56 +02:00
Daniel P. Berrange
1fad1f9400 qcow: make encrypt_sectors encrypt in place
Instead of requiring separate input/output buffers for
encrypting data, change encrypt_sectors() to assume
use of a single buffer, encrypting in place. One current
caller uses the same buffer for input/output already
and the other two callers are easily converted to do so.

Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-9-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:56 +02:00
Daniel P. Berrange
0cb8d47ba9 block: deprecate "encryption=on" in favor of "encrypt.format=aes"
Historically the qcow & qcow2 image formats supported a property
"encryption=on" to enable their built-in AES encryption. We'll
soon be supporting LUKS for qcow2, so need a more general purpose
way to enable encryption, with a choice of formats.

This introduces an "encrypt.format" option, which will later be
joined by a number of other "encrypt.XXX" options. The use of
a "encrypt." prefix instead of "encrypt-" is done to facilitate
mapping to a nested QAPI schema at later date.

e.g. the preferred syntax is now

  qemu-img create -f qcow2 -o encrypt.format=aes demo.qcow2

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-8-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:55 +02:00
Daniel P. Berrange
06af39ecf9 iotests: skip 048 with qcow which doesn't support resize
Test 048 is designed to verify data preservation during an
image resize. The qcow (v1) format impl has never supported
resize so always fails.

Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-7-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:55 +02:00
Daniel P. Berrange
ebab5636f9 iotests: skip 042 with qcow which dosn't support zero sized images
Test 042 is designed to verify operation with zero sized images.
Such images are not supported with qcow (v1), so this test has
always failed.

Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-6-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:55 +02:00
Daniel P. Berrange
6aa837f7bd qcow: require image size to be > 1 for new images
The qcow driver refuses to open images which are less than
2 bytes in size, but will happily create such images. Add
a check in the create path to avoid this discrepancy.

Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-5-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:55 +02:00
Daniel P. Berrange
0b4ee9090e qcow: document another weakness of qcow AES encryption
Document that use of guest virtual sector numbers as the basis for
the initialization vectors is a potential weakness, when combined
with internal snapshots or multiple images using the same passphrase.
This fixes the formatting of the itemized list too.

Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-4-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:55 +02:00
Daniel P. Berrange
4a47f85431 block: add ability to set a prefix for opt names
When integrating the crypto support with qcow/qcow2, we don't
want to use the bare LUKS option names "hash-alg", "key-secret",
etc. We need to namespace them to match the nested QAPI schema.

e.g. "encrypt.hash-alg", "encrypt.key-secret"

so that they don't clash with any general qcow options at a later
date.

Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-3-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:55 +02:00
Daniel P. Berrange
306a06e5f7 block: expose crypto option names / defs to other drivers
The block/crypto.c defines a set of QemuOpts that provide
parameters for encryption. This will also be needed by
the qcow/qcow2 integration, so expose the relevant pieces
in a new block/crypto.h header. Some helper methods taking
QemuOpts are changed to take QDict to simplify usage in
other places.

Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-2-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-07-11 17:44:55 +02:00
Peter Maydell
aa916e409c Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.10-20170711' into staging
ppc patch queue 2017-07-11

  * Several minor cleanups from Greg Kurz
  * Fix for migration of pseries-2.7 and earlier machine types
  * More reworking of the DRC hotplug code, fixing several problems
    though there are still more to go
  * Fixes for CPU family / alias handling on POWER9
  * Preliminary patches for POWER9 XIVE (new interrupt controller)
    support
  * Assorted other fixes

# gpg: Signature made Tue 11 Jul 2017 05:35:16 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.10-20170711:
  spapr: populate device tree depending on XIVE_EXPLOIT option
  spapr: introduce the XIVE_EXPLOIT option in CAS
  ppc/kvm: have the "family" CPU alias to point to TYPE_HOST_POWERPC_CPU
  spapr: Only report host/guest IOMMU page size mismatches on KVM
  spapr: fix memory hotplug error path
  target/ppc: Add debug function for radix mmu translation
  target/ppc: Refactor tcg radix mmu code
  spapr: Use unplug_request for PCI hot unplug
  spapr: Remove unnecessary differences between hotplug and coldplug paths
  spapr: Add DRC release method
  spapr: Uniform DRC reset paths
  spapr: Leave DR-indicator management to the guest
  target-ppc: SPR_BOOKE_ESR not set on FP exceptions
  spapr: fix migration to pseries machine < 2.8
  spapr: fix bogus function name in comment
  spapr: refresh "platform-specific" hcalls comment
  spapr: make spapr_populate_hotplug_cpu_dt() static

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-11 16:34:09 +01:00
Peter Maydell
a7a305aee1 include/hw/ptimer.h: Add documentation comments
Add documentation comments describing the public API of the
ptimer countdown timer.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-07-11 17:44:27 +03:00
Paolo Bonzini
44d619beb5 hxtool: remove dead -q option
This was used to extract .txt documentation for QMP.  This was
changed to use the QAPI schema instead, so zap it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-07-11 17:42:49 +03:00
Aurelien Jarno
9768e2abf7 mips/malta: load the initrd at the end of the low memory
Currently the malta board is loading the initrd just after the kernel.
This doesn't work for kaslr enabled kernels, as the initrd ends-up being
overwritten.

Move the initrd at the end of the low memory, that should leave a
sufficient gap for kaslr.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Tested-by: Yongbok Kim <yongbok.kim@imgtec.com>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-07-11 15:06:34 +01:00
Miodrag Dinic
cab4888136 target/mips: fix msa copy_[s|u]_df rd = 0 corner case
This patch fixes the msa copy_[s|u]_df instruction emulation when
the destination register rd is zero. Without this patch the zero
register would get clobbered, which should never happen because it
is supposed to be hardwired to 0.

Fix this corner case by explicitly checking rd = 0 and effectively
making these instructions emulation no-op in that case.

Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Acked-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-07-11 15:06:34 +01:00
Peter Maydell
29741be341 Merge remote-tracking branch 'remotes/awilliam/tags/vfio-updates-20170710.0' into staging
VFIO fixes 2017-07-10

 - Don't iterate over non-realized devices (Alex Williamson)
 - Add PCIe capability version fixup (Alex Williamson)

# gpg: Signature made Mon 10 Jul 2017 20:06:11 BST
# gpg:                using RSA key 0x239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B  8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-updates-20170710.0:
  vfio/pci: Fixup v0 PCIe capabilities
  vfio: Test realized when using VFIOGroup.device_list iterator

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-11 13:47:28 +01:00
Paolo Bonzini
b5ed2e11ef build: disable Xen on ARM
While ARM could present the xenpv machine, it does not and trying to enable
it breaks compilation.  Revert to the previous test which only looked at
$target_name, not $cpu.

Fixes: 3b6b75506d
Reported-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170711100049.20513-1-pbonzini@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-11 11:23:47 +01:00
Peter Maydell
792dac309c target-arm: v7M: ignore writes to CONTROL.SPSEL from Thread mode
For v7M, writes to the CONTROL register are only permitted for
privileged code. However even if the code is privileged, the
write must not affect the SPSEL bit in the CONTROL register
if the CPU is in Thread mode (as documented in the pseudocode
for the MSR instruction). Implement this, instead of permitting
SPSEL to be written in all cases.

This was causing mbed applications not to run, because the
RTX RTOS they use relies on this behaviour.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1498820791-8130-1-git-send-email-peter.maydell@linaro.org
2017-07-11 11:21:26 +01:00
Alexander Graf
5d721b785f ARM: KVM: Enable in-kernel timers with user space gic
When running with KVM enabled, you can choose between emulating the
gic in kernel or user space. If the kernel supports in-kernel virtualization
of the interrupt controller, it will default to that. If not, if will
default to user space emulation.

Unfortunately when running in user mode gic emulation, we miss out on
interrupt events which are only available from kernel space, such as the timer.
This patch leverages the new kernel/user space pending line synchronization for
timer events. It does not handle PMU events yet.

Signed-off-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Message-id: 1498577737-130264-1-git-send-email-agraf@suse.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-11 11:21:26 +01:00
Joel Stanley
f986ee1d43 aspeed: Register all watchdogs
The ast2400 contains two and the ast2500 contains three watchdogs.
Add this information to the AspeedSoCInfo and realise the correct number
of watchdogs for that each SoC type.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Tested-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-11 11:21:26 +01:00
Krzysztof Kozlowski
499ca13792 hw/misc: Add Exynos4210 Pseudo Random Number Generator
Add emulation for Exynos4210 Pseudo Random Number Generator which could
work on fixed seeds or with seeds provided by True Random Number
Generator block inside the SoC.

Implement only the fixed seeds part of it in polling mode (no
interrupts).

Emulation tested with two independent Linux kernel exynos-rng drivers:
1. New kcapi-rng interface (targeting Linux v4.12),
2. Old hwrng inteface
   # echo "exynos" > /sys/class/misc/hw_random/rng_current
   # dd if=/dev/hwrng of=/dev/null bs=1 count=16

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Message-id: 20170425180609.11004-1-krzk@kernel.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: wrapped a few overlong lines; more efficient implementation
 of exynos4210_rng_seed_ready()]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-11 11:21:26 +01:00
Daniel P. Berrange
cbea0ac4fe backends: remove empty trace-events file
The content of the backends/trace-events file was entirely
removed in

  commit 6b10e573d1
  Author: Marc-André Lureau <marcandre.lureau@redhat.com>
  Date:   Mon May 29 12:39:42 2017 +0400

    char: move char devices to chardev/

Leaving the empty file around, causes tracetool to generate
an empty .dtrace file which makes the dtrace compiler throw
a syntax error.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170629162046.4135-1-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-11 09:35:11 +01:00
Lluís Vilanova
f5956d71fb trace: Fix early setting of events with the "vcpu" property
Events with the "vcpu" property need to be set globally (i.e., as if they didn't
have that property) while we have not yet created any vCPU.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 149838891852.10366.11525912227070211356.stgit@frigg.lan
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-07-11 09:35:11 +01:00
Li Ping
9bd8e9330a qga-win32: Fix memory leak of device information set
The caller of SetupDiGetClassDevs must delete the returned device information
 set when it is no longer needed by calling SetupDiDestroyDeviceInfoList.

Signed-off-by: Li Ping <li.ping288@zte.com.cn>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-07-11 11:18:37 +03:00
Philippe Mathieu-Daudé
32fb354b08 hw/core: fix missing return value in load_image_targphys_as()
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-07-11 11:18:37 +03:00
Philippe Mathieu-Daudé
c8e1158cf6 elf-loader: warn about invalid endianness
fprintf(stderr) is how errors are reported in this file.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-07-11 11:18:37 +03:00
Peter Maydell
e189091fba configure: Handle having no c++ compiler in FORTIFY_SOURCE check
Our FORTIFY_SOURCE check assumes that $cxx refers to a working C++
compiler, with the result that if you don't happen to have one
then configure will spuriously print
  configure: line 4685: c++: command not found

Fix this by adding a 'has $cxx' check.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-07-11 11:18:37 +03:00
Philippe Mathieu-Daudé
667675623d hw/pci: define msi_nonbroken in pci-stub
The kludged field 'msi_nonbroken' is declared in "hw/pci/msi.h" and defined in
hw/pci/msi.c.
When using an ARM config with CONFIG_PCI disabled, hw/pci/msi.c is not included.
Without being PCI-related, the files hw/intc/arm_gicv[23*].c do access this
field (to enable the kludge if PCI is enabled).
The final link fails since hw/pci/msi.c is not included.
Defining this field in pci-stub is safe enough for configs without CONFIG_PCI.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-07-11 11:18:37 +03:00
Philippe Mathieu-Daudé
8f7b1bd6ce hw/misc: add missing includes
inlined create_unimplemented_device() calls sysbus_mmio_map_overlap().

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-07-11 11:18:37 +03:00
Thomas Huth
5f37e6d4a7 configure: Fix build with pkg-config and --static --enable-sdl
The configure script prefers pkg-config over sdl-config, but
the "--static-libs" parameter only exists for the latter. With
pkg-config, "--static --libs" have to be used instead.

Buglink: https://bugs.launchpad.net/qemu/+bug/984516
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-07-11 11:18:37 +03:00
Mao Zhongyi
3bcf7d7df5 util/qemu-sockets: Drop unused helper socket_address_to_string()
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-07-11 11:18:37 +03:00
Max Filippov
aa5a2c0b3d target/xtensa: gdbstub: drop dead return statement
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-07-11 11:18:37 +03:00
Cédric Le Goater
b87680427e spapr: populate device tree depending on XIVE_EXPLOIT option
When XIVE is supported, the device tree should be populated
accordingly and the XIVE memory regions mapped to activate MMIOs.

Depending on the design we choose, we could also allocate different
ICS and ICP objects, or switch between objects. This needs to be
discussed.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-11 11:04:02 +10:00
Cédric Le Goater
f2b14e3a9f spapr: introduce the XIVE_EXPLOIT option in CAS
On POWER9, the Client Architecture Support (CAS) negotiation process
determines whether the guest operates in XIVE Legacy compatibility
(the former POWER8 interrupt model) or in XIVE exploitation mode (the
newer POWER9 interrupt model).

Bit 7 of Byte 23 of vector 5 is used for this purpose.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-11 11:04:02 +10:00
Greg Kurz
92e926e1e3 ppc/kvm: have the "family" CPU alias to point to TYPE_HOST_POWERPC_CPU
When running KVM on POWER, we allow the user to pass "-cpu POWERx" instead
of "-cpu host". This is achieved by patching the ppc_cpu_aliases[] array
so that "POWERx" points to the CPU class with the same PVR as the host CPU.
This causes CPUs to be instantiated from this CPU class instead of the
TYPE_HOST_POWERPC_CPU class which is used with "-cpu host". These CPUs thus
miss all the KVM specific tuning from kvmppc_host_cpu_class_init().

This currently causes QEMU with "-cpu POWER9" to fail when running KVM on a
POWER9 DD1 host:

qemu-system-ppc64: Register sync failed... If you're using kvm-hv.ko, only
 "-cpu host" is possible
kvm_init_vcpu failed: Invalid argument

Let's have the "POWERx" alias to point to TYPE_HOST_POWERPC_CPU directly,
so that "-cpu POWERx" instantiates CPUs from the same class as "-cpu host".

Signed-off-by: Greg Kurz <groug@kaod.org>
Tested-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-11 11:04:02 +10:00
David Gibson
2a0d90fed5 spapr: Only report host/guest IOMMU page size mismatches on KVM
We print a warning if the spapr IOMMU isn't configured to support a page
size matching the host page size backing RAM.  When that's the case we need
more complex logic to translate VFIO mappings, which is slower.

But, it's not so slow that it would be at all noticeable against the
general slowness of TCG.  So, only warn when using KVM.  This removes some
noisy and unhelpful warnings from make check on hosts with page sizes
which typically differ from those on POWER (e.g. Sparc).

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2017-07-11 11:04:02 +10:00
Greg Kurz
160bb67885 spapr: fix memory hotplug error path
QEMU shouldn't abort if spapr_add_lmbs()->spapr_drc_attach() fails.
Let's propagate the error instead, like it is done everywhere else
where spapr_drc_attach() is called.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-11 11:04:02 +10:00
Suraj Jitindar Singh
95cb065776 target/ppc: Add debug function for radix mmu translation
In target/ppc/mmu-hash64.c there already exists the function
ppc_hash64_get_phys_page_debug() to get the physical (real) address for
a given effective address in hash mode.

Implement the function ppc_radix64_get_phys_page_debug() to allow a real
address to be obtained for a given effective address in radix mode.
This is used when a debugger is attached to qemu.

Previously we just had a comment saying this is unimplemented which then
fell through to the default case and caused an abort due to
unrecognised mmu model as the default had no case for the V3 mmu, which
was misleading at best.

We reuse ppc_radix64_walk_tree() which is used by the radix fault
handler since the process of walking the radix tree is identical.

Reported-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-11 11:04:02 +10:00
Suraj Jitindar Singh
6a042827b6 target/ppc: Refactor tcg radix mmu code
The mmu-radix64.c file implements functions to enable the radix mmu
emulation in tcg mode. There is a function ppc_radix64_walk_tree() which
performs the radix tree walk and also implicitly checks the pte
protection.

Move the protection checking of the pte from the ppc_radix64_walk_tree()
function into the caller. This means the ppc_radix64_walk_tree() function
can be used without protection checking which is useful for debugging.

ppc_radix64_walk_tree() no longer needs to take the rwx and prot variables.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-11 11:04:02 +10:00
David Gibson
3340e5c4f2 spapr: Use unplug_request for PCI hot unplug
AIUI, ->unplug_request in the HotplugHandler is used for "soft"
unplug, where acknowledgement from the guest is required before
completing the unplug, whereas ->unplug is used for "hard" unplug
where qemu unilaterally removes the device, and the guest just has to
cope with its sudden absence.  For spapr we (correctly) use
->unplug_request for CPU and memory hot unplug but we use ->unplug for
PCI.

While I think it might be possible to support "hard" PCI unplug within
the PAPR model, that's not how it actually works now.  Although it's
called from ->unplug, the PCI unplug path will usually just mark the
device for removal, with completion of the unplug delayed until
userspace responds to the unplug notification. If the guest doesn't
respond as expected, that could delay the unplug completion arbitrarily
long.

To reflect that, change the PCI unplug path to be called from
->unplug_request.  We also rename spapr_phb_hot_plug_child() and
spapr_phb_hot_unplug_child() to spapr_pci_plug() and
spapr_pci_unplug_request() to more obviously reflect the callbacks they're
implementing.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-07-11 11:04:02 +10:00
David Gibson
5c1da81215 spapr: Remove unnecessary differences between hotplug and coldplug paths
spapr_drc_attach() has a 'coldplug' parameter which sets the DRC into
configured state initially, instead of the usual ISOLATED/UNUSABLE state.
It turns out this is unnecessary: although coldplugged devices do need to
be in CONFIGURED state once the guest starts, that will already be
accomplished by the reset code which will move DRCs for already plugged
devices into a coldplug equivalent state.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-07-11 11:04:01 +10:00
David Gibson
6b762f29a8 spapr: Add DRC release method
At the moment, spapr_drc_release() has an ugly switch on the DRC type to
call the right, device-specific release function.  This cleans it up by
doing that via a proper QOM method.

It's still arguably an abstraction violation for the DRC code to call into
the specific device code, but one mess at a time.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-07-11 11:04:01 +10:00
David Gibson
6caf3ac613 spapr: Uniform DRC reset paths
DRC objects have a regular device reset method.  However, it only gets
called in the usual way for PCI DRCs.  Because of where CPU and LMB DRCs
are in the QOM tree, their device reset method isn't automatically called.
So, the machine manually registers reset handlers to call device_reset().

This patch removes the device reset method, and instead always explicitly
registers the reset handler from realize().  This means the callers don't
have to worry about the two cases, and we always get proper resets.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-07-11 11:04:01 +10:00
David Gibson
f8dc29834c spapr: Leave DR-indicator management to the guest
The DR-indicator is essentially a "virtual LED" attached to a hotpluggable
device, which the guest can set to various states for the attention of
the operator or management layers.

It's mostly guest managed, except that we once-off set it to
ACTIVE/INACTIVE in the attach/detach path.  While that makes certain sense,
there's no indication in PAPR that the hypervisor should do this, and the
drmgr code on the guest side doesn't appear to need it (it will already set
the indicator to ACTIVE on hotplug, and INACTIVE on remove).

So, leave the DR-indicator entirely to the guest; the only thing we need
to do is ensure it's in a sane state on reset.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-07-11 11:04:01 +10:00
Aaron Larson
0ee604abce target-ppc: SPR_BOOKE_ESR not set on FP exceptions
Properly set the book E exception syndrome register when a floating
point exception occurs.

Currently on a book E processor, the POWERPC_EXCP_FP exception handler
fails to set "env->spr[SPR_BOOKE_ESR] = ESR_FP;" as required by the
book E specification.

Signed-off-by: Aaron Larson <alarson@ddci.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-11 11:04:01 +10:00
Laurent Vivier
e806b4db14 spapr: fix migration to pseries machine < 2.8
since commit 5c4537bd ("spapr: Fix 2.7<->2.8 migration of PCI host bridge"),
some migration fields are forged from the new ones in spapr_pci_pre_save().

It works well, except when the number of MSI devices is 0,
because in this case the function exits immediately.

This fix moves the migration code before the exit code.

The problem can be reproduced with these commands:

source qemu-2.9:

    qemu-system-ppc64 -monitor stdio -M pseries-2.6 -nodefaults -S

destination qemu-2.6:

    qemu-system-ppc64 -monitor stdio -M pseries-2.6 -nodefaults \
                      -incoming tcp:0:4444

on the source:

    migrate tcp:localhost:4444

Destination fails with the following error:

    qemu-system-ppc64: error while loading state for
                       instance 0x0 of device 'spapr_pci'
    qemu-system-ppc64: load of migration failed: Invalid argument

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-11 11:04:01 +10:00
Greg Kurz
f3728f9cbb spapr: fix bogus function name in comment
$ git grep spapr_ppc_reset
hw/ppc/spapr.c: * as part of spapr_ppc_reset().

$ git grep ppc_spapr_reset
hw/ppc/spapr.c:static void ppc_spapr_reset(void)
hw/ppc/spapr.c:    mc->reset = ppc_spapr_reset;
hw/ppc/spapr_hcall.c:        /* If ppc_spapr_reset() did not set up a HPT
 but one is necessary

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-11 11:04:01 +10:00
Greg Kurz
498cd99544 spapr: refresh "platform-specific" hcalls comment
We have more of these since the addition of KVMPPC_H_LOGICAL_MEMOP in 2012.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-11 11:04:01 +10:00
Greg Kurz
04d0ffbd52 spapr: make spapr_populate_hotplug_cpu_dt() static
Since commit ff9006ddbf ("spapr: move spapr_core_[foo]plug() callbacks
close to machine code in spapr.c"), this function doesn't need to be extern
anymore.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-07-11 11:04:01 +10:00
Peter Maydell
3d0bf8dfdf Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20170710a' into staging
Migration pull 2017-07-10

# gpg: Signature made Mon 10 Jul 2017 18:04:57 BST
# gpg:                using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20170710a:
  migration: Make compression_threads use save/load_setup/cleanup()
  migration: Convert ram to use new load_setup()/load_cleanup()
  migration: Create load_setup()/cleanup() methods
  migration: Rename cleanup() to save_cleanup()
  migration: Rename save_live_setup() to save_setup()
  doc: update TYPE_MIGRATION documents
  doc: add item for "-M enforce-config-section"
  vl: move global property, migrate init earlier
  migration: fix handling for --only-migratable

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-10 18:13:03 +01:00
Juan Quintela
f0afa331ce migration: Make compression_threads use save/load_setup/cleanup()
Once there, be consistent and use
compress_thread_{save,load}_{setup,cleanup}.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170628095228.4661-6-quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-07-10 17:52:21 +01:00
Juan Quintela
f265e0e437 migration: Convert ram to use new load_setup()/load_cleanup()
Once there, I rename ram_migration_cleanup() to ram_save_cleanup().
Notice that this is the first pass, and I only passed XBZRLE to the
new scheme.  Moved decoded_buf to inside XBZRLE struct.
As a bonus, I don't have to export xbzrle functions from ram.c.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

--

loaded_data pointer was needed because called can change it (dave)
spell loaded correctly in comment (dave)
Message-Id: <20170628095228.4661-5-quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-07-10 17:52:21 +01:00
Juan Quintela
acb5ea8697 migration: Create load_setup()/cleanup() methods
We need to do things at load time and at cleanup time.

Signed-off-by: Juan Quintela <quintela@redhat.com>

--

Move the printing of the error message so we can print the device
giving the error.
Add call to postcopy stuff
Message-Id: <20170628095228.4661-4-quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-07-10 17:52:21 +01:00
Juan Quintela
70f794fcfa migration: Rename cleanup() to save_cleanup()
We need a cleanup for loads, so we rename here to be consistent.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

--

Rename htab_cleanup to htap_save_cleanup as dave suggestion
Message-Id: <20170628095228.4661-3-quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-07-10 17:52:21 +01:00
Juan Quintela
9907e842d7 migration: Rename save_live_setup() to save_setup()
We are going to use it now for more than save live regions.
Once there rename qemu_savevm_state_begin() to qemu_savevm_state_setup().

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170628095228.4661-2-quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-07-10 17:52:21 +01:00
Peter Xu
c8d3ff384f doc: update TYPE_MIGRATION documents
[Peter collected Eduardo's patch comment and formatted into patch]

Suggested-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1499242883-2184-5-git-send-email-peterx@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-07-10 17:52:21 +01:00
Peter Xu
16f7244842 doc: add item for "-M enforce-config-section"
It's never documented, and now we have one more parameter for it (which
obsoletes this one). Document it properly.

Suggested-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1499396048-21657-1-git-send-email-peterx@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
  Removed 'Although now' commit message as per Eduardo's review
2017-07-10 17:52:21 +01:00
Peter Xu
00b8ea4e34 vl: move global property, migrate init earlier
Currently drive_init_func() may call migrate_get_current() while the
migrate object is still not ready yet at that time. Move the migration
object init earlier, along with the global properties, right after
acceleration init.

This fixes a breakage for iotest 055, which caused an assertion failure.

Reported-by: Max Reitz <mreitz@redhat.com>
Reported-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Tested-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Fixes: 3df663 ("migration: move only_migratable to MigrationState")
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1499242883-2184-3-git-send-email-peterx@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-07-10 17:52:21 +01:00
Peter Xu
b605c47b57 migration: fix handling for --only-migratable
MigrateState object is not ready at that time, so we'll get an
assertion. Use qemu_global_option() instead.

Reported-by: Eduardo Habkost <ehabkost@redhat.com>
Suggested-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Fixes: 3df663e ("migration: move only_migratable to MigrationState")
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1499242883-2184-2-git-send-email-peterx@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-07-10 17:52:21 +01:00
Alex Williamson
47985727e3 vfio/pci: Fixup v0 PCIe capabilities
Intel 82599 VFs report a PCIe capability version of 0, which is
invalid.  The earliest version of the PCIe spec used version 1.  This
causes Windows to fail startup on the device and it will be disabled
with error code 10.  Our choices are either to drop the PCIe cap on
such devices, which has the side effect of likely preventing the guest
from discovering any extended capabilities, or performing a fixup to
update the capability to the earliest valid version.  This implements
the latter.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-07-10 10:39:43 -06:00
Alex Williamson
7da624e26a vfio: Test realized when using VFIOGroup.device_list iterator
VFIOGroup.device_list is effectively our reference tracking mechanism
such that we can teardown a group when all of the device references
are removed.  However, we also use this list from our machine reset
handler for processing resets that affect multiple devices.  Generally
device removals are fully processed (exitfn + finalize) when this
reset handler is invoked, however if the removal is triggered via
another reset handler (piix4_reset->acpi_pcihp_reset) then the device
exitfn may run, but not finalize.  In this case we hit asserts when
we start trying to access PCI helpers since much of the PCI state of
the device is released.  To resolve this, add a pointer to the Object
DeviceState in our common base-device and skip non-realized devices
as we iterate.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-07-10 10:39:43 -06:00
Peter Maydell
6b06e3e49e Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2017-07-10-v2' into staging
nbd patches for 2017-07-10

- Eric Blake: MAINTAINERS: Promote NBD to supported, with new maintainer
- Vladimir Sementsov-Ogievskiy: [00/10] nbd refactoring part 2

# gpg: Signature made Mon 10 Jul 2017 15:59:18 BST
# gpg:                using RSA key 0xA7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>"
# gpg:                 aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>"
# gpg:                 aka "[jpeg image of size 6874]"
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2  F3AA A7A1 6B4A 2527 436A

* remotes/ericb/tags/pull-nbd-2017-07-10-v2:
  nbd: use generic trace subsystem instead of TRACE macro
  nbd: refactor tracing
  nbd/server: rename clientflags var in nbd_negotiate_options
  nbd/server: fix TRACE in nbd_negotiate_send_rep_len
  nbd/client: refactor TRACE of NBD_MAGIC
  nbd/common: nbd_tls_handshake: remove extra TRACE
  nbd/server: add errp to nbd_send_reply()
  nbd/server: use errp instead of LOG
  nbd/server: refactor nbd_negotiate
  nbd/server: nbd_negotiate: return 1 on NBD_OPT_ABORT
  MAINTAINERS: Promote NBD to supported, with new maintainer

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-10 16:12:47 +01:00
Vladimir Sementsov-Ogievskiy
9588463e74 nbd: use generic trace subsystem instead of TRACE macro
Let NBD use the trace mechanisms already present in qemu. Now you can
use the -trace optino of qemu, or the -T/--trace option of qemu-img,
qemu-io, and qemu-nbd, to select nbd traces. For qemu, the QMP commands
trace-event-{get,set}-state can also toggle tracing on the fly.

Example:
   qemu-nbd --trace 'nbd_*' <image file> # enables all nbd traces

Recompilation with CFLAGS=-DDEBUG_NBD is no more needed, furthermore,
DEBUG_NBD macro is removed from the code.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170707152918.23086-11-vsementsov@virtuozzo.com>
[eblake: minor tweaks to a couple of traces]
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-07-10 09:57:24 -05:00
Vladimir Sementsov-Ogievskiy
6fb2b9726c nbd: refactor tracing
Reorganize traces: move, reword, add information, drop extra ones.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170707152918.23086-10-vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-07-10 09:57:24 -05:00
Vladimir Sementsov-Ogievskiy
7f9039cdaa nbd/server: rename clientflags var in nbd_negotiate_options
Rename 'clientflags' to just 'option'. This variable has nothing to do
with flags, but is a single integer representing the option requested
by the client.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170707152918.23086-9-vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-07-10 09:57:24 -05:00
Vladimir Sementsov-Ogievskiy
4875196163 nbd/server: fix TRACE in nbd_negotiate_send_rep_len
Fix wrong order of TRACE arguments.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170707152918.23086-8-vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-07-10 09:57:24 -05:00
Vladimir Sementsov-Ogievskiy
458d7a6939 nbd/client: refactor TRACE of NBD_MAGIC
We are going to switch from TRACE macro to trace points,
this TRACE complicates things, this patch simplifies it.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170707152918.23086-7-vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-07-10 09:57:24 -05:00
Vladimir Sementsov-Ogievskiy
3e6bb543c2 nbd/common: nbd_tls_handshake: remove extra TRACE
Error is propagated to the caller, TRACE is not needed.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707152918.23086-6-vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-07-10 09:57:24 -05:00
Vladimir Sementsov-Ogievskiy
c7b9728250 nbd/server: add errp to nbd_send_reply()
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707152918.23086-5-vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-07-10 09:57:24 -05:00
Vladimir Sementsov-Ogievskiy
2fd2c8407e nbd/server: use errp instead of LOG
Move to modern errp scheme from just LOGging errors.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170707152918.23086-4-vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-07-10 09:57:24 -05:00
Vladimir Sementsov-Ogievskiy
76ff081d91 nbd/server: refactor nbd_negotiate
Combine two successive "if (oldStyle) {...} else {...}" into one.

Block "if (client->tlscreds)" under "if (oldStyle)" is unreachable,
as we have "oldStyle = client->exp != NULL && !client->tlscreds;".
So, delete this block.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170707152918.23086-3-vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-07-10 09:57:24 -05:00
Vladimir Sementsov-Ogievskiy
1e120ffead nbd/server: nbd_negotiate: return 1 on NBD_OPT_ABORT
Separate the case when a client sends NBD_OPT_ABORT from all other
errors. It will be needed for the following patch, where errors will be
reported.
This particular case is not actually an error - it honestly follows the
NBD protocol. Therefore it should not be reported like an error.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707152918.23086-2-vsementsov@virtuozzo.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2017-07-10 09:57:24 -05:00
Eric Blake
99c62e70aa MAINTAINERS: Promote NBD to supported, with new maintainer
We are promising more than just odd fixes, and Paolo is hoping
to offload the pull requests to me.  Also, enough of NBD is related
to the block layer that it is worth including qemu-block on patches.

While at it, include blockdev-nbd.c and qemu-nbd.texi in the set
of maintained files.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707182151.29872-1-eblake@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-10 09:56:01 -05:00
Peter Maydell
94c56652b9 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Mon 10 Jul 2017 12:26:44 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (40 commits)
  block: Make bdrv_is_allocated_above() byte-based
  block: Minimize raw use of bds->total_sectors
  block: Make bdrv_is_allocated() byte-based
  backup: Switch backup_run() to byte-based
  backup: Switch backup_do_cow() to byte-based
  backup: Switch block_backup.h to byte-based
  backup: Switch BackupBlockJob to byte-based
  block: Drop unused bdrv_round_sectors_to_clusters()
  mirror: Switch mirror_iteration() to byte-based
  mirror: Switch mirror_do_read() to byte-based
  mirror: Switch mirror_cow_align() to byte-based
  mirror: Update signature of mirror_clip_sectors()
  mirror: Switch mirror_do_zero_or_discard() to byte-based
  mirror: Switch MirrorBlockJob to byte-based
  commit: Switch commit_run() to byte-based
  commit: Switch commit_populate() to byte-based
  stream: Switch stream_run() to byte-based
  stream: Drop reached_end for stream_complete()
  stream: Switch stream_populate() to byte-based
  trace: Show blockjob actions via bytes, not sectors
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-10 14:06:49 +01:00
Eric Blake
51b0a48888 block: Make bdrv_is_allocated_above() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based.  In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.

Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated.  For now,
the io.c layer still assert()s that all callers are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status.  Therefore, for the most part this patch is just the
addition of scaling at the callers followed by inverse scaling at
bdrv_is_allocated().  But some code, particularly stream_run(),
gets a lot simpler because it no longer has to mess with sectors.
Leave comments where we can further simplify by switching to
byte-based iterations, once later patches eliminate the need for
sector-aligned operations.

For ease of review, bdrv_is_allocated() was tackled separately.

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:07 +02:00
Eric Blake
c00716beb3 block: Minimize raw use of bds->total_sectors
bdrv_is_allocated_above() was relying on intermediate->total_sectors,
which is a field that can have stale contents depending on the value
of intermediate->has_variable_length.  An audit shows that we are safe
(we were first calling through bdrv_co_get_block_status() which in
turn calls bdrv_nb_sectors() and therefore just refreshed the current
length), but it's nicer to favor our accessor functions to avoid having
to repeat such an audit, even if it means refresh_total_sectors() is
called more frequently.

Suggested-by: John Snow <jsnow@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:07 +02:00
Eric Blake
d6a644bbfe block: Make bdrv_is_allocated() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based.  In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.

Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated.  For now,
the io.c layer still assert()s that all callers are sector-aligned
on input and that *pnum is sector-aligned on return to the caller,
but that can be relaxed when a later patch implements byte-based
block status.  Therefore, this code adds usages like
DIV_ROUND_UP(,BDRV_SECTOR_SIZE) to callers that still want aligned
values, where the call might reasonbly give non-aligned results
in the future; on the other hand, no rounding is needed for callers
that should just continue to work with byte alignment.

For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_is_allocated().  But
some code, particularly bdrv_commit(), gets a lot simpler because it
no longer has to mess with sectors; also, it is now possible to pass
NULL if the caller does not care how much of the image is allocated
beyond the initial offset.  Leave comments where we can further
simplify once a later patch eliminates the need for sector-aligned
requests through bdrv_is_allocated().

For ease of review, bdrv_is_allocated_above() will be tackled
separately.

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:07 +02:00
Eric Blake
6f8e35e241 backup: Switch backup_run() to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Change the internal
loop iteration of backups to track by bytes instead of sectors
(although we are still guaranteed that we iterate by steps that
are cluster-aligned).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
03f5d60bbf backup: Switch backup_do_cow() to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
function (no semantic change).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
f6ac207893 backup: Switch block_backup.h to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Continue by converting
the public interface to backup jobs (no semantic change), including
a change to CowRequest to track by bytes instead of cluster indices.

Note that this does not change the difference between the public
interface (starting point, and size of the subsequent range) and
the internal interface (starting and end points).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Xie Changlong <xiechanglong@cmss.chinamobile.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
cf79cdf662 backup: Switch BackupBlockJob to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Continue by converting an
internal structure (no semantic change), and all references to
tracking progress.  Drop a redundant local variable bytes_per_cluster.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
e8a81e9cad block: Drop unused bdrv_round_sectors_to_clusters()
Now that the last user [mirror_iteration()] has converted to using
bytes, we no longer need a function to round sectors to clusters.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
fb2ef7919b mirror: Switch mirror_iteration() to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Change the internal
loop iteration of mirroring to track by bytes instead of sectors
(although we are still guaranteed that we iterate by steps that
are both sector-aligned and multiples of the granularity).  Drop
the now-unused mirror_clip_sectors().

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
ae4cc8777b mirror: Switch mirror_do_read() to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
function, preserving all existing semantics, and adding one more
assertion that things are still sector-aligned (so that conversions
to sectors in mirror_read_complete don't need to round).

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
782d97efec mirror: Switch mirror_cow_align() to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
function (no semantic change), and add mirror_clip_bytes() as a
counterpart to mirror_clip_sectors().  Some of the conversion is
a bit tricky, requiring temporaries to convert between units; it
will be cleared up in a following patch.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
931e52607f mirror: Update signature of mirror_clip_sectors()
Rather than having a void function that modifies its input
in-place as the output, change the signature to reduce a layer
of indirection and return the result.

Suggested-by: John Snow <jsnow@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
e6f2419389 mirror: Switch mirror_do_zero_or_discard() to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
function (no semantic change).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
b436982f04 mirror: Switch MirrorBlockJob to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Continue by converting an
internal structure (no semantic change), and all references to the
buffer size.

Add an assertion that our use of s->granularity >> BDRV_SECTOR_BITS
(necessary for interaction with sector-based dirty bitmaps, until
a later patch converts those to be byte-based) does not suffer from
truncation problems.

[checkpatch has a false positive on use of MIN() in this patch]

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
317a6676a2 commit: Switch commit_run() to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Change the internal
loop iteration of committing to track by bytes instead of sectors
(although we are still guaranteed that we iterate by steps that
are sector-aligned).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
d8a9858408 commit: Switch commit_populate() to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Start by converting an
internal function (no semantic change).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
d535435f4a stream: Switch stream_run() to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Change the internal
loop iteration of streaming to track by bytes instead of sectors
(although we are still guaranteed that we iterate by steps that
are sector-aligned).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
158c649257 stream: Drop reached_end for stream_complete()
stream_complete() skips the work of rewriting the backing file if
the job was cancelled, if data->reached_end is false, or if there
was an error detected (non-zero data->ret) during the streaming.
But note that in stream_run(), data->reached_end is only set if the
loop ran to completion, and data->ret is only 0 in two cases:
either the loop ran to completion (possibly by cancellation, but
stream_complete checks for that), or we took an early goto out
because there is no bs->backing.  Thus, we can preserve the same
semantics without the use of reached_end, by merely checking for
bs->backing (and logically, if there was no backing file, streaming
is a no-op, so there is no backing file to rewrite).

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
8493211c02 stream: Switch stream_populate() to byte-based
We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Start by converting an
internal function (no semantic change).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
5cb1a49e01 trace: Show blockjob actions via bytes, not sectors
Upcoming patches are going to switch to byte-based interfaces
instead of sector-based.  Even worse, trace_backup_do_cow_enter()
had a weird mix of cluster and sector indices.

The trace interface is low enough that there are no stability
guarantees, and therefore nothing wrong with changing our units,
even in cases like trace_backup_do_cow_skip() where we are not
changing the trace output.  So make the tracing uniformly use
bytes.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Eric Blake
f3e4ce4af3 blockjob: Track job ratelimits via bytes, not sectors
The user interface specifies job rate limits in bytes/second.
It's pointless to have our internal representation track things
in sectors/second, particularly since we want to move away from
sector-based interfaces.

Fix up a doc typo found while verifying that the ratelimit
code handles the scaling difference.

Repetition of expressions like 'n * BDRV_SECTOR_SIZE' will be
cleaned up later when functions are converted to iterate over
images by bytes rather than by sectors.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Thomas Huth
c616f16e0c blockdev: Print a warning for legacy drive options that belong to -device
We likely do not want to carry these legacy -drive options along forever.
Let's emit a deprecation warning for the -drive options that have a
replacement with the -device option, so that the (hopefully few) remaining
users are aware of this and can adapt their scripts / behaviour accordingly.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Daniel P. Berrange
6b4df54833 qemu-img: drop -e and -6 options from the 'create' & 'convert' commands
The '-e' and '-6' options to the 'create' & 'convert' commands were
"deprecated" in favour of the more generic '-o' option many years ago:

  commit eec77d9e71
  Author: Jes Sorensen <Jes.Sorensen@redhat.com>
  Date:   Tue Dec 7 17:44:34 2010 +0100

    qemu-img: Deprecate obsolete -6 and -e options

Except this was never actually a deprecation, which would imply giving
the user a warning while the functionality continues to work for a
number of releases before eventual removal. Instead the options were
immediately turned into an error + exit. Given that the functionality
is already broken, there's no point in keeping these psuedo-deprecation
messages around any longer.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Hervé Poussineau
8b544293ef vvfat: change OEM name to 'MSWIN4.1'
According to specification:
"'MSWIN4.1' is the recommanded setting, because it is the setting least likely
to cause compatibility problems. If you want to put something else in here,
that is your option, but the result may be that some FAT drivers might not
recognize the volume."

Specification: "FAT: General overview of on-disk format" v1.03, page 9
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:06 +02:00
Hervé Poussineau
78f002c901 vvfat: handle KANJI lead byte 0xe5
Specification: "FAT: General overview of on-disk format" v1.03, page 23
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Hervé Poussineau
6817efea3a vvfat: limit number of entries in root directory in FAT12/FAT16
FAT12/FAT16 root directory is two sectors in size, which allows only 512 directory entries.
Prevent QEMU startup if too much files exist, instead of overflowing root directory.

Also introduce variable root_entries, which will be required for FAT32.

Fixes: https://bugs.launchpad.net/qemu/+bug/1599539/comments/4
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Hervé Poussineau
339cebcc01 vvfat: correctly generate numeric-tail of short file names
More specifically:
- try without numeric-tail only if LFN didn't have invalid short chars
- start at ~1 (instead of ~0)
- handle case if numeric tail is more than one char (ie > 10)

Windows 9x Scandisk doesn't see anymore mismatches between short file names and
long file names for non-ASCII filenames.

Specification: "FAT: General overview of on-disk format" v1.03, page 31
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Hervé Poussineau
0c36111f57 vvfat: correctly create base short names for non-ASCII filenames
More specifically, create short name from filename and change blacklist of
invalid chars to whitelist of valid chars.

Windows 9x also now correctly see long file names of filenames containing a space,
but Scandisk still complains about mismatch between SFN and LFN.

[kwolf: Build fix for this intermediate patch (it included declarations
 for variables that are only used in the next patch) ]

Specification: "FAT: General overview of on-disk format" v1.03, pages 30-31
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Hervé Poussineau
09ec4119fb vvfat: correctly create long names for non-ASCII filenames
Assume that input filename is encoded as UTF-8, so correctly create UTF-16 encoding.

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Hervé Poussineau
f82d92bb02 vvfat: always create . and .. entries at first and in that order
readdir() doesn't always return . and .. entries at first and in that order.
This leads to not creating them at first in the directory, which raises some
errors on file system checking utilities like MS-DOS Scandisk.

Specification: "FAT: General overview of on-disk format" v1.03, page 25

Fixes: https://bugs.launchpad.net/qemu/+bug/1599539
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Hervé Poussineau
92e28d8220 vvfat: fix field names in FAT12/FAT16 and FAT32 boot sectors
Specification: "FAT: General overview of on-disk format" v1.03, pages 11-13
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Hervé Poussineau
4dc705dc7e vvfat: introduce offset_to_bootsector, offset_to_fat and offset_to_root_dir
- offset_to_bootsector is the number of sectors up to FAT bootsector
- offset_to_fat is the number of sectors up to first File Allocation Table
- offset_to_root_dir is the number of sectors up to root directory sector

Replace first_sectors_number - 1 by offset_to_bootsector.
Replace first_sectors_number by offset_to_fat.
Replace faked_sectors by offset_to_rootdir.

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Hervé Poussineau
ad05b31857 vvfat: rename useless enumeration values
MODE_FAKED and MODE_RENAMED are not and were never used.

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Hervé Poussineau
5f5b29dfce vvfat: fix typos
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Hervé Poussineau
d6a7e54ed3 vvfat: replace tabs by 8 spaces
This was a complete mess. On 2299 indented lines:
- 1329 were with spaces only
- 617 with tabulations only
- 353 with spaces and tabulations

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Hervé Poussineau
139921aaa7 vvfat: fix qemu-img map and qemu-img convert
- bs->total_sectors is the number of sectors of the whole disk
- s->sector_count is the number of sectors of the FAT partition

This fixes the following assert in qemu-img map:
qemu-img.c:2641: get_block_status: Assertion `nb_sectors' failed.

This also fixes an infinite loop in qemu-img convert.

Fixes: 4480e0f924
Fixes: https://bugs.launchpad.net/qemu/+bug/1599539
Cc: qemu-stable@nongnu.org
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Eric Blake
544daf6679 blkdebug: Support .bdrv_co_get_block_status
Without a passthrough status of BDRV_BLOCK_RAW, anything wrapped by
blkdebug appears 100% allocated as data.  Better is treating it the
same as the underlying file being wrapped.

Update iotest 177 for the new expected output.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Eric Blake
d5254033da block: Simplify use of BDRV_BLOCK_RAW
The lone caller that cares about a return of BDRV_BLOCK_RAW
(namely, io.c:bdrv_co_get_block_status) completely replaces the
return value, so there is no point in passing BDRV_BLOCK_DATA.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Eric Blake
81c219ac6c block: Guarantee that *file is set on bdrv_get_block_status()
We document that *file is valid if the return is not an error and
includes BDRV_BLOCK_OFFSET_VALID, but forgot to obey this contract
when a driver (such as blkdebug) lacks a callback.  Messed up in
commit 67a0fd2 (v2.6), when we added the file parameter.

Enhance qemu-iotest 177 to cover this, using a sequence that would
print garbage or even SEGV, because it was dererefencing through
uninitialized memory.  [The resulting test output shows that we
have less-than-ideal block status from the blkdebug driver, but
that's a separate fix coming up soon.]

Setting *file on all paths that return BDRV_BLOCK_OFFSET_VALID is
enough to fix the crash, but we can go one step further: always
setting *file, even on error, means that a broken caller that
blindly dereferences file without checking for error is now more
likely to get a reliable SEGV instead of randomly acting on garbage,
making it easier to diagnose such buggy callers.  Adding an
assertion that file is set where expected doesn't hurt either.

CC: qemu-stable@nongnu.org
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Eric Blake
64ebf55648 qemu-io: Don't die on second open
Most callback commands in qemu-io return 0 to keep the interpreter
loop running, or 1 to quit immediately.  However, open_f() just
passed through the return value of openfile(), which has different
semantics of returning 0 if a file was opened, or 1 on any failure.

As a result of mixing the return semantics, we are forcing the
qemu-io interpreter to exit early on any failures, which is rather
annoying when some of the failures are obviously trying to give
the user a hint of how to proceed (if we didn't then kill qemu-io
out from under the user's feet):

$ qemu-io
qemu-io> open foo
qemu-io> open foo
file open already, try 'help close'
$ echo $?
0

In general, we WANT openfile() to report failures, since it is the
function used in the form 'qemu-io -c "$something" no_such_file'
for performing one or more -c options on a single file, and it is
not worth attempting $something if the file itself cannot be opened.
So the solution is to fix open_f() to always return 0 (when we are
in interactive mode, even failure to open should not end the
session), and save the return value of openfile() for command line
use in main().

Note, however, that we do have some qemu-iotests that do 'qemu-io
-c "open file" -c "$something"'; such tests will now proceed to
attempt $something whether or not the open succeeded, the same way
as if the two commands had been attempted in interactive mode.  As
such, the expected output for those tests has to be modified.  But it
also means that it is now possible to use -c close and have a single
qemu-io command line operate on more than one file even without
using interactive mode.  Although the '-c open' action is a subtle
change in behavior, remember that qemu-io is for debugging purposes,
so as long as it serves the needs of qemu-iotests while still being
reasonable for interactive use, it should not be a problem that we
are changing tests to the new behavior.

This has been awkward since at least as far back as commit
e3aff4f, in 2009.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-10 13:18:05 +02:00
Peter Maydell
6580476a14 Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170709' into staging
Queued TCG patches

# gpg: Signature made Mon 10 Jul 2017 08:31:44 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20170709:
  tcg/mips: Bugfix for crash when running program with qemu-i386.
  util/cacheinfo: Fix warning generated by clang
  tcg/aarch64: Enable indirect jump path using LDR (literal)
  tcg/aarch64: Use ADRP+ADD to compute target address
  tcg/aarch64: Introduce and use long branch to register

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-10 12:11:34 +01:00
Peter Maydell
77d4722918 Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20170707-tag' into staging
Xen 2017/07/07

# gpg: Signature made Fri 07 Jul 2017 19:21:22 BST
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# gpg:                 aka "Stefano Stabellini <sstabellini@kernel.org>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20170707-tag:
  xen/pt: Fixup addr validation in xen_pt_pci_config_access_check
  xen-platform: Cleanup network infrastructure when emulated NICs are unplugged
  xenfb: remove xen_init_display "temporary" hack

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-10 10:29:11 +01:00
Jiang Biao
8b8d768f19 tcg/mips: Bugfix for crash when running program with qemu-i386.
When running a helloworld program with qemu-i386 in linux-user
mode on Loongson 3A3000, it will crash. This patch fix the bug.

Signed-off-by: Jiang Biao <jiang.biao2@zte.com.cn>
Message-Id: <1499669979-25904-1-git-send-email-jiang.biao2@zte.com.cn>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-09 21:11:38 -10:00
Pranith Kumar
2ae96c157a util/cacheinfo: Fix warning generated by clang
Clang generates the following warning on aarch64 host:

  CC      util/cacheinfo.o
/home/pranith/qemu/util/cacheinfo.c:121:48: warning: value size does not match register size specified by the constraint and modifier [-Wasm-operand-widths]
        asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr));
                                               ^
/home/pranith/qemu/util/cacheinfo.c:121:28: note: use constraint modifier "w"
        asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr));
                           ^~
                           %w0

Constraint modifier 'w' is not (yet?) accepted by gcc. Fix this by increasing the ctr size.

Tested-by: Emilio G. Cota <cota@braap.org>
Reviewed-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <20170630153946.11997-1-bobby.prani@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-09 21:10:23 -10:00
Pranith Kumar
2acee8b2b5 tcg/aarch64: Enable indirect jump path using LDR (literal)
This patch enables the indirect jump path using an LDR (literal)
instruction. It will be interesting to test and see which performs
better among the two paths.

CC: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <20170630143614.31059-3-bobby.prani@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-09 21:10:23 -10:00
Pranith Kumar
b68686bd4b tcg/aarch64: Use ADRP+ADD to compute target address
We use ADRP+ADD to compute the target address for goto_tb. This patch
introduces the NOP instruction which is used to align the above
instruction pair so that we can use one atomic instruction to patch
the destination offsets.

CC: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <20170630143614.31059-2-bobby.prani@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-09 21:10:23 -10:00
Pranith Kumar
23b7aa1d2a tcg/aarch64: Introduce and use long branch to register
We can use a branch to register instruction for exit_tb for offsets
greater than 128MB.

CC: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <20170630143614.31059-1-bobby.prani@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-09 21:10:23 -10:00
Anoob Soman
4daf62594d xen/pt: Fixup addr validation in xen_pt_pci_config_access_check
xen_pt_pci_config_access_check checks if addr >= 0xFF. 0xFF is a valid
address and should not be ignored.

Signed-off-by: Anoob Soman <anoob.soman@citrix.com>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-07-07 11:13:10 -07:00
Ross Lagerwall
6c808651e3 xen-platform: Cleanup network infrastructure when emulated NICs are unplugged
When the guest unplugs the emulated NICs, cleanup the peer for each NIC
as it is not needed anymore. Most importantly, this allows the tap
interfaces which QEMU holds open to be closed and removed.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-07-07 11:11:12 -07:00
Stefano Stabellini
9f2130f58d xenfb: remove xen_init_display "temporary" hack
Initialize xenfb properly, as all other backends, from its own
"initialise" function.

Remove the dependency of vkbd on vfb: use qemu_console_lookup_by_index
to find the principal console (to get the size of the screen) instead of
relying on a vfb backend to be available (which adds a dependency
between the two).

Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
2017-07-07 11:10:03 -07:00
Peter Maydell
b113658675 Merge remote-tracking branch 'remotes/borntraeger/tags/s390x-20170706' into staging
s390x/kvm/migration: fixes, enhancements and cleanups

- new email address for Cornelia
- Fixes: 3270, flic, virtio-scsi-ccw, ipl
- Enhancements, cpumodel, migration

# gpg: Signature made Thu 06 Jul 2017 08:18:19 BST
# gpg:                using RSA key 0x117BBC80B5A61C7C
# gpg: Good signature from "Christian Borntraeger (IBM) <borntraeger@de.ibm.com>"
# Primary key fingerprint: F922 9381 A334 08F9 DBAB  FBCA 117B BC80 B5A6 1C7C

* remotes/borntraeger/tags/s390x-20170706:
  hw/s390x/ipl: Fix endianness problem with netboot_start_addr
  virtio-scsi-ccw: use ioeventfd even when KVM is disabled
  s390x: return unavailable features via query-cpu-definitions
  s390x/MAINTAINERS: Update my email address
  s390x: fix realize inheritance for kvm-flic
  s390x: fix error propagation in kvm-flic's realize
  s390x/3270: fix instruction interception handler
  s390x: vmstatify config migration for virtio-ccw

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-06 11:42:59 +01:00
Peter Maydell
67b9c5d4f3 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* qemu-thread portability improvement (Fam)
* virtio-scsi IOMMU fix (Jason)
* poisoning and common-obj-y cleanups (Thomas)
* initial Hypervisor.framework refactoring (Sergio)
* x86 TCG interrupt injection fixes (Wu Xiang, me)
* --disable-tcg support for x86 (Yang Zhong, me)
* various other bugfixes and cleanups (Daniel, Peter, Thomas)

# gpg: Signature made Wed 05 Jul 2017 08:12:56 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (42 commits)
  target/i386: add the CONFIG_TCG into Makefiles
  target/i386: add the tcg_enabled() in target/i386/
  target/i386: move TLB refill function out of helper.c
  target/i386: split cpu_set_mxcsr() and make cpu_set_fpuc() inline
  target/i386: make cpu_get_fp80()/cpu_set_fp80() static
  target/i386: move cpu_sync_bndcs_hflags() function
  tcg: add the CONFIG_TCG into Makefiles
  tcg: add CONFIG_TCG guards in headers
  exec: elide calls to tb_lock and tb_unlock
  tcg: move tb_lock out of translate-all.h
  tcg: add the tcg-stub.c file into accel/stubs/
  vapic: use tcg_enabled
  monitor: disable "info jit" and "info opcount" if !TCG
  tcg: make tcg_allowed global
  cpu: move interrupt handling out of translate-common.c
  tcg: move page_size_init() function
  vl: add tcg_enabled() for tcg related code
  vl: convert -tb-size to qemu_strtoul
  configure: add --disable-tcg configure option
  configure: early test for supported targets
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-06 10:15:09 +01:00
Thomas Huth
1045e3cdaf hw/s390x/ipl: Fix endianness problem with netboot_start_addr
The start address has to be stored in big endian byte order
in the iplb.ccw block for the guest.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1499268345-12552-1-git-send-email-thuth@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-05 19:46:30 +02:00
QingFeng Hao
cda3c19ff5 virtio-scsi-ccw: use ioeventfd even when KVM is disabled
This patch is based on a similar patch from Stefan Hajnoczi -
commit c324fd0a39 ("virtio-pci: use ioeventfd even when KVM is disabled")

Do not check kvm_eventfds_enabled() when KVM is disabled since it
always returns 0.  Since commit 8c56c1a592
("memory: emulate ioeventfd") it has been possible to use ioeventfds in
qtest or TCG mode.

This patch makes -device virtio-scsi-ccw,iothread=iothread0 work even
when KVM is disabled.
Currently we don't have an equivalent to "memory: emulate ioeventfd"
for ccw yet, but that this doesn't hurt and qemu-iotests 068 can pass with
skipping iothread arguments.

I have tested that virtio-scsi-ccw works under tcg both with and without
iothread.

This patch fixes qemu-iotests 068, which was accidentally merged early
despite the dependency on ioeventfd.

Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Message-Id: <20170704132350.11874-2-haoqf@linux.vnet.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-05 19:45:02 +02:00
Viktor Mihajlovski
38cba1f4d8 s390x: return unavailable features via query-cpu-definitions
The response for query-cpu-definitions didn't include the
unavailable-features field, which is used by libvirt to figure
out whether a certain cpu model is usable on the host.

The unavailable features are now computed by obtaining the host CPU
model and comparing it against the known CPU models. The comparison
takes into account the generation, the GA level and the feature
bitmaps. In the case of a CPU generation/GA level mismatch
a feature called "type" is reported to be missing.

As a result, the output of virsh domcapabilities would change
from something like
 ...
     <mode name='custom' supported='yes'>
      <model usable='unknown'>z10EC-base</model>
      <model usable='unknown'>z9EC-base</model>
      <model usable='unknown'>z196.2-base</model>
      <model usable='unknown'>z900-base</model>
      <model usable='unknown'>z990</model>
 ...
to
 ...
     <mode name='custom' supported='yes'>
      <model usable='yes'>z10EC-base</model>
      <model usable='yes'>z9EC-base</model>
      <model usable='no'>z196.2-base</model>
      <model usable='yes'>z900-base</model>
      <model usable='yes'>z990</model>
 ...

Signed-off-by: Viktor Mihajlovski <mihajlov@linux.vnet.ibm.com>
Message-Id: <1499082529-16970-1-git-send-email-mihajlov@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-05 19:44:24 +02:00
Cornelia Huck
c1976ae7a2 s390x/MAINTAINERS: Update my email address
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Message-Id: <20170704092215.13742-2-cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-05 19:44:24 +02:00
Halil Pasic
5cbab1bfde s390x: fix realize inheritance for kvm-flic
Commit f6f4ce4211 ("s390x: add property adapter_routes_max_batch",
2016-12-09) introduces a common realize (intended to be common for all
the subclasses) for flic, but fails to make sure the kvm-flic which had
its own is actually calling this common realize.

This omission fortunately does not result in a grave problem. The common
realize was only supposed to catch a possible programming mistake by
validating a value of a property set via the compat machine macros. Since
there was no programming mistake we don't need this fixed for stable.

Let's fix this problem by making sure kvm flic honors the realize of its
parent class.

Let us also improve on the error message we would hypothetically emit
when the validation fails.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Fixes: f6f4ce4211 ("s390x: add property adapter_routes_max_batch")
Reviewed-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Reviewed-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-05 19:44:23 +02:00
Halil Pasic
f62f210943 s390x: fix error propagation in kvm-flic's realize
From the moment it was introduced by commit a2875e6f98 ("s390x/kvm:
implement floating-interrupt controller device", 2013-07-16) the kvm-flic
is not making realize fail properly in case it's impossible to create the
KVM device which basically serves as a backend and is absolutely
essential for having an operational kvm-flic.

Let's fix this by making sure we do proper error propagation in realize.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Fixes: a2875e6f98 "s390x/kvm: implement floating-interrupt controller device"
Reviewed-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Reviewed-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-05 19:44:23 +02:00
Dong Jia Shi
1728cff2ab s390x/3270: fix instruction interception handler
Commit bab482d740 ("s390x/css: ccw translation infrastructure")
introduced instruction interception handler for different types of
subchannels. For emulated 3270 devices, we should assign the virtual
subchannel handler to them during device realization process, or 3270
will not work.

Fixes: bab482d740 ("s390x/css: ccw translation infrastructure")

Reviewed-by: Jing Liu <liujbjl@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-05 12:16:55 +02:00
Halil Pasic
517ff12c7d s390x: vmstatify config migration for virtio-ccw
Let's vmstatify virtio_ccw_save_config and virtio_ccw_load_config for
flexibility (extending using subsections) and for fun.

To achieve this we need to hack the config_vector, which is VirtIODevice
(that is common virtio) state, in the middle of the VirtioCcwDevice state
representation.  This is somewhat ugly, but we have no choice because the
stream format needs to be preserved.

Almost no changes in behavior. Exception is everything that comes with
vmstate like extra bookkeeping about what's in the stream, and maybe some
extra checks and better error reporting.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Message-Id: <20170703213414.94298-1-pasic@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-07-05 12:16:55 +02:00
Yang Zhong
44eff67341 target/i386: add the CONFIG_TCG into Makefiles
Add the CONFIG_TCG for frontend and backend's files in the related
Makefiles.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-05 09:12:44 +02:00
Yang Zhong
79c664f62d target/i386: add the tcg_enabled() in target/i386/
Add the tcg_enabled() where the x86 target needs to disable
TCG-specific code.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-05 09:12:44 +02:00
Paolo Bonzini
6578eb25a0 target/i386: move TLB refill function out of helper.c
This function calls tlb_set_page_with_attrs, which is not available
when TCG is disabled.  Move it to excp_helper.c.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-05 09:12:44 +02:00
Yang Zhong
1d8ad165b6 target/i386: split cpu_set_mxcsr() and make cpu_set_fpuc() inline
Split the cpu_set_mxcsr() and make cpu_set_fpuc() inline with specific
tcg code.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-05 09:12:44 +02:00
Yang Zhong
db573d2cf7 target/i386: make cpu_get_fp80()/cpu_set_fp80() static
Move cpu_get_fp80()/cpu_set_fp80() from fpu_helper.c to
machine.c because fpu_helper.c will be disabled if tcg is
disabled in the build.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-05 09:12:44 +02:00
Yang Zhong
ab0a19d4f0 target/i386: move cpu_sync_bndcs_hflags() function
Move cpu_sync_bndcs_hflags() function from mpx_helper.c
to helper.c because mpx_helper.c need be disabled when
tcg is disabled.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-05 09:12:44 +02:00
Yang Zhong
e4b4b6428c tcg: add the CONFIG_TCG into Makefiles
Add the CONFIG_TCG for frontend and backend's files in the related
Makefiles.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-05 09:12:44 +02:00
Yang Zhong
b11ec7f2e4 tcg: add CONFIG_TCG guards in headers
Add CONFIG_TCG around TLB-related functions and structure declarations.
Some of these functions are defined in ./accel/tcg/cputlb.c, which will
not be linked in if TCG is disabled, and have no stubs; therefore, their
callers will also be compiled out for --disable-tcg.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-05 09:11:08 +02:00
Paolo Bonzini
5aa1ef71b4 exec: elide calls to tb_lock and tb_unlock
Adding assertions fixes link errors.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 16:01:16 +02:00
Paolo Bonzini
beeaef55e4 tcg: move tb_lock out of translate-all.h
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 16:01:16 +02:00
Yang Zhong
a574cf9b41 tcg: add the tcg-stub.c file into accel/stubs/
If tcg is disabled, the functions in tcg-stub.c file will be called.
This file is target-independent file, do not include any platform
related stub functions into this file.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 16:01:16 +02:00
Paolo Bonzini
24d90a3cfd vapic: use tcg_enabled
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 16:01:16 +02:00
Paolo Bonzini
f0d14a95a5 monitor: disable "info jit" and "info opcount" if !TCG
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 16:01:16 +02:00
Yang Zhong
8e2b72990e tcg: make tcg_allowed global
Change the tcg_enabled() and make sure user build still enable tcg
even x86 softmmu disable tcg.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 16:01:16 +02:00
Paolo Bonzini
290dae4678 cpu: move interrupt handling out of translate-common.c
translate-common.c will not be available anymore with --disable-tcg,
so we cannot leave cpu_interrupt_handler there.

Move the TCG-specific handler to accel/tcg/tcg-all.c, and adopt
KVM's handler as the default one, since it works just as well for
Xen and qtest.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 16:00:43 +02:00
Yang Zhong
a0be0c585f tcg: move page_size_init() function
translate-all.c will be disabled if tcg is disabled in the build,
so page_size_init() function and related variables will be moved
to exec.c file.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 16:00:12 +02:00
Yang Zhong
e7b161d573 vl: add tcg_enabled() for tcg related code
Need to disable the tcg related code in the vl.c if the
disable-tcg option is added into ./configure command.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:28 +02:00
Paolo Bonzini
8b3ae692b8 vl: convert -tb-size to qemu_strtoul
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:28 +02:00
Paolo Bonzini
b3f6ea7e55 configure: add --disable-tcg configure option
This lets you build without TCG (hardware accelerationor qtest only).  When
this flag is passed to configure, it will automatically filter out the target
list to only those that support KVM or Xen or HAX.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:28 +02:00
Paolo Bonzini
d880a3ba7d configure: early test for supported targets
Check for unsupported targets in target_list, and print an
error early in the configuration process.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:28 +02:00
Paolo Bonzini
3b6b75506d configure: factor out list of supported Xen/KVM/HAX targets
This will be useful when the functions are called, early in the configure
process, to filter out targets that do not support hardware acceleration.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:28 +02:00
Paolo Bonzini
ee29bdb6a7 qemu-doc: do not refer to years-old version numbers
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:28 +02:00
Fam Zheng
c096358e74 qemu-thread: Assert locks are initialized before using
Not all platforms check whether a lock is initialized before used.  In
particular Linux seems to be more permissive than OSX.

Check initialization state explicitly in our code to catch such bugs
earlier.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170704122325.25634-1-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:28 +02:00
Jason Wang
025bdeab3c virtio-scsi: finalize IOMMU support
After converting to use DMA api for virtio devices, we should use
dma_as instead of address_space_memory. Otherwise it won't work if
IOMMU is enabled.

Fixes: commit 8607f5c307 ("virtio: convert to use DMA api")
Cc: qemu-stable@nongnu.org
Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <1499170866-9068-1-git-send-email-jasowang@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:28 +02:00
Paolo Bonzini
e8c2091d4c checkpatch: should not use signal except for SIG_DFL or SIG_IGN
Using signal to establish a signal handler is not portable; on
SysV systems, the signal handler would be reset to SIG_DFL after
delivery, while BSD preserves the signal handler.  Daniel Berrange
reported that (to complicate matters further) the signal system call
has SysV behavior, but glibc signal() actually calls the sigaction
system call to provide BSD behavior.

However, using signal() to set a signal's disposition to SIG_DFL
or SIG_IGN is portable and is a relatively common occurrence in
QEMU source code, so allow that.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Richard W.M. Jones <rjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:28 +02:00
Peter Maydell
de5f852f38 main_loop: Make main_loop_wait() return void
The last users of main_loop_wait() that cared about the return value
have now been changed to no longer use it. Drop the now-useless return
value and make the function return void.

We avoid the awkwardness of ifdeffery to handle the 'ret'
variable in main_loop_wait() only being wanted if CONFIG_SLIRP
by simply dropping all the ifdefs. There are stub implementations
of slirp_pollfds_poll() and slirp_pollfds_fill() already in
stubs/slirp.c which do nothing, as required.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <1498584769-12439-3-git-send-email-peter.maydell@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:28 +02:00
Peter Maydell
be59df797c tests/test-char.c: Don't use main_loop_wait()'s return value
In QEMU's main_loop() we used to check whether we should do
a nonblocking call to main_loop(); this was deleted in commit e330c118f2,
because now that vCPUs always drop the I/O thread lock it is an unnecessary
optimization.

The loop in test-char.c copied the old QEMU main_loop() code, but
here the nonblocking check has never been necessary because this
standalone test case doesn't hold the I/O lock anyway. Remove it,
so we can drop the main_loop_wait() return value.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-Id: <1498584769-12439-2-git-send-email-peter.maydell@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:27 +02:00
Alistair Francis
0ec7b53482 util/oslib-win32: Remove if conditional
The original ready < nhandles - 1 can be re-written as ready + 1 <
nhandles.  The check was actually incorrect because
WAIT_OBJECT_0 was not subtracted from ready; it worked because
WAIT_OBJECT_0 is zero.  After subtracting WAIT_OBJECT_0,
the result is the same condition that we are checking on the first
itteration of the for loop. This means we can remove the if statement
and let the for loop check the code.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Message-Id: <a14083d681951f3999a0e9314605cb706381ae8d.1498756113.git.alistair.francis@xilinx.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:27 +02:00
Sergio Andres Gomez Del Real
86a57621ce xsave_helper: pull xsave and xrstor out of kvm.c into helper function
This patch pulls out of kvm.c and into the new files the implementation
for the xsave and xrstor instructions. This so they can be shared by
kvm and hvf.

Signed-off-by: Sergio Andres Gomez Del Real <Sergio.G.DelReal@gmail.com>
Message-Id: <20170626200832.11058-1-Sergio.G.DelReal@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sergio Andres Gomez Del Real <sergio.g.delreal@gmail.com>
2017-07-04 14:39:27 +02:00
Daniel P. Berrange
56382bd577 sockets: avoid formatting buffer that may not be NUL terminated
The 'sun_path' field in the sockaddr_un struct is not required
to be NUL termianted, so when reporting an error, we must use
the separate 'path' variable which is guaranteed terminated.

Fixes a bug spotted by coverity that was introduced in

  commit ad9579aaa1
  Author: Daniel P. Berrange <berrange@redhat.com>
  Date:   Thu May 25 16:53:00 2017 +0100

    sockets: improve error reporting if UNIX socket path is too long

Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <20170626103756.22974-1-berrange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:27 +02:00
Thomas Huth
24f7ca4907 hw/misc/edu: Compile the edu device as common object
edu.c does not contain any target-specific code, so we can put
it into common-obj-y to compile it only once for all targets.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1498454578-18709-8-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:27 +02:00
Thomas Huth
5ddc64822b Makefile: Move bootdevice.o to common-obj-y
There does not seem to be any target specific code in this file, so
we can put it into "common-obj" instead of "obj" to compile it only
once for all targets.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1498454578-18709-7-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:27 +02:00
Thomas Huth
47507383c6 include/exec/poison: Mark CONFIG_SOFTMMU as poisoned
CONFIG_SOFTMMU should never be used in common code, so mark
it as poisoned, too.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1498454578-18709-6-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:39:11 +02:00
Thomas Huth
2cd5394311 cpu: Introduce a wrapper for tlb_flush() that can be used in common code
Commit 1f5c00cfdb ("qom/cpu: move tlb_flush to cpu_common_reset")
moved the call to tlb_flush() from the target-specific reset handlers
into the common code qom/cpu.c file, and protected the call with
"#ifdef CONFIG_SOFTMMU" to avoid that it is called for linux-user
only targets. But since qom/cpu.c is common code, CONFIG_SOFTMMU is
*never* defined here, so the tlb_flush() was simply never executed
anymore. Fix it by introducing a wrapper for tlb_flush() in a file
that is re-compiled for each target, i.e. in translate-all.c.

Fixes: 1f5c00cfdb
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1498454578-18709-5-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:30:03 +02:00
Thomas Huth
cbca3722a3 include/exec/poison: Mark CONFIG_KVM as poisoned, too
CONFIG_KVM is only defined for target-specific code, so nobody should
use it by accident in common code. To avoid such subtle bugs,
CONFIG_KVM is now marked as poisoned in common code. The header
include/sysemu/kvm.h is somewhat special since it is included
all over the place from common code, too, so we need some extra
logic via "#ifdef NEED_CPU_H" here to make sure that we can
compile all files without problems.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1498454578-18709-4-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:30:03 +02:00
Thomas Huth
2099935dbf Move CONFIG_KVM related definitions to kvm_i386.h
pc.h and sysemu/kvm.h are also included from common code (where
CONFIG_KVM is not available), so the #defines that depend on CONFIG_KVM
should not be declared here to avoid that anybody is using them in a
wrong way. Since we're also going to poison CONFIG_KVM for common code,
let's move them to kvm_i386.h instead. Most of the dummy definitions
from sysemu/kvm.h are also unused since the code that uses them is
only compiled for CONFIG_KVM (e.g. target/i386/kvm.c), so the unused
defines are also simply dropped here instead of being moved.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1498454578-18709-3-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:30:03 +02:00
Thomas Huth
50b8a2d326 include/exec/poison: Add some more missing TARGET and CONFIG defines
The defines of some *-linux-user targets were still missing.

Suggested-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1498454578-18709-2-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:30:03 +02:00
Paolo Bonzini
1110bfe6f5 target/i386: simplify handling of conforming code segments on interrupt
Move the handling of conforming code segments before the handling
of stack switch.

Because dpl == cpl after the new "if", it's now unnecessary to check
the C bit when testing dpl < cpl.  Furthermore, dpl > cpl is checked
slightly above the modified code, so the final "else" is unreachable
and we can remove it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:30:03 +02:00
Wu Xiang
e95e9b88ba target/i386: fix interrupt CPL error when using ist in x86-64
In do_interrupt64(), when interrupt stack table(ist) is enabled
and the the target code segment is conforming(e2 & DESC_C_MASK), the
old implementation always set new CPL to 0, and SS.RPL to 0.

This is incorrect for when CPL3 code access a CPL0 conforming code
segment, the CPL should remain unchanged. Otherwise higher privileged
code can be compromised.

The patch fix this for always set dpl = cpl when the target code segment
is conforming, and modify the last parameter `flags`, which contains
correct new CPL, in cpu_x86_load_seg_cache().

Signed-off-by: Wu Xiang <willx8@gmail.com>
Message-Id: <20170621142152.GA18094@wxdeubuntu.ipads-lab.se.sjtu.edu.cn>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:30:03 +02:00
Paolo Bonzini
96d06835dc nbd: fix NBD over TLS
When attaching the NBD QIOChannel to an AioContext, the TLS channel should
be used, not the underlying socket channel.  This is because, trivially,
the TLS channel will be the one that we read/write to and thus the one
that will get the qio_channel_yield() call.

Fixes: ff82911cd3
Cc: qemu-stable@nongnu.org
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Tested-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:30:03 +02:00
Thomas Huth
abc67eb6e8 qemu-doc: Add missing "@c man end" statements
Since commit 3f2ce724f1 ("Move the qemu-ga description into a
separate chapter"), the qemu.1 man page looks pretty much screwed
up, e.g. the title was "qemu-ga - QEMU Guest Agent" instead of
"qemu-doc - QEMU Emulator User Documentation". However, that movement
of the gemu-ga chapter is not the real problem, it just triggered
another bug in the qemu-doc.texi: There are some parts in the file
which introduce a "@c man begin OPTIONS" section, but never close
it again with "@c man end". After adding the proper end tags here,
the title of the man page is right again and the previously wrongly
tagged sections now also show up correctly in the man page, too.

Reported-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1497863771-24929-1-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:30:03 +02:00
Sergio Andres Gomez Del Real
99f318322e vcpu_dirty: share the same field in CPUState for all accelerators
This patch simply replaces the separate boolean field in CPUState that
kvm, hax (and upcoming hvf) have for keeping track of vcpu dirtiness
with a single shared field.

Signed-off-by: Sergio Andres Gomez Del Real <Sergio.G.DelReal@gmail.com>
Message-Id: <20170618191101.3457-1-Sergio.G.DelReal@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-04 14:30:03 +02:00
Peter Maydell
2185c93ba8 Merge remote-tracking branch 'remotes/edgar/tags/edgar/xilinx-next.for-upstream' into staging
edgar/xilinx-next.for-upstream

# gpg: Signature made Tue 04 Jul 2017 10:00:47 BST
# gpg:                using RSA key 0x29C596780F6BCA83
# gpg: Good signature from "Edgar E. Iglesias (Xilinx key) <edgar.iglesias@xilinx.com>"
# gpg:                 aka "Edgar E. Iglesias <edgar.iglesias@gmail.com>"
# Primary key fingerprint: AC44 FEDC 14F7 F1EB EDBF  4151 29C5 9678 0F6B CA83

* remotes/edgar/tags/edgar/xilinx-next.for-upstream:
  xilinx-dp: Add support for the yuy2 video format
  target-microblaze: Add CPU version 10.0
  target-microblaze: dec_barrel: Add BSIFI
  target-microblaze: dec_barrel: Add BSEFI
  target-microblaze: dec_barrel: Plug TCG temp leak
  target-microblaze: dec_barrel: Add braces around if-statements
  target-microblaze: dec_barrel: Use extract32
  target-microblaze: dec_barrel: Use bool instead of unsigned int
  target-microblaze: Introduce a use-pcmp-instr property
  target-microblaze: Introduce a use-msr-instr property
  target-microblaze: Introduce a use-hw-mul property
  target-microblaze: Introduce a use-div property
  target-microblaze: Introduce a use-barrel property
  target-microblaze: Add CPU versions 9.4, 9.5 and 9.6
  target-microblaze: Don't hard code 0xb as initial MB version
  target-microblaze: Correct bit shift for the PVR0 version field
  disas/microblaze: Add missing 'const' attributes

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-04 13:05:30 +01:00
Peter Maydell
0c7a8b9baa Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc, acpi, pci, virtio: fixes, cleanups, features, tests

Some fixes and cleanups. New tests.
Configurable tx queue size for virtio-net.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Mon 03 Jul 2017 20:43:17 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (21 commits)
  i386/acpi: update expected acpi files
  virtio-net: fix tx queue size for !vhost-user
  tests: Add unit tests for the VM Generation ID feature
  vhost-user: unregister slave req handler at cleanup time
  vhost: ensure vhost_ops are set before calling iotlb callback
  intel_iommu: fix migration breakage on mr switch
  hw/acpi: remove dead acpi code
  fw_cfg: move setting of FW_CFG_VERSION_DMA bit to fw_cfg_init1()
  fw_cfg: don't map the fw_cfg IO ports in fw_cfg_io_realize()
  i386/kvm/pci-assign: Use errp directly rather than local_err
  i386/kvm/pci-assign: Fix return type of verify_irqchip_kernel()
  pci: Convert shpc_init() to Error
  pci: Convert to realize
  pci: Replace pci_add_capability2() with pci_add_capability()
  pci: Make errp the last parameter of pci_add_capability()
  pci: Fix the wrong assertion.
  pci: Add comment for pci_add_capability2()
  pci: Clean up error checking in pci_add_capability()
  intel_iommu: relax iq tail check on VTD_GCMD_QIE enable
  hw/pci-bridge/dec: Classify the DEC PCI bridge as bridge device
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-04 11:17:02 +01:00
Edgar E. Iglesias
31cf950ea2 xilinx-dp: Add support for the yuy2 video format
Add support for the yuy2 video format.

Reviewed-by: KONRAD Frederic <frederic.konrad@adacore.com>
Acked-by: Sai Pavan Boddu <saipava@xilinx.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
feac83af3b target-microblaze: Add CPU version 10.0
Add CPU version 10.0.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
d09b2585f2 target-microblaze: dec_barrel: Add BSIFI
Add support for BSIFI.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
faa48d742c target-microblaze: dec_barrel: Add BSEFI
Add support for BSEFI.

Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
5c8f44b7db target-microblaze: dec_barrel: Plug TCG temp leak
Plug TCG temp leak.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
2acf6d539c target-microblaze: dec_barrel: Add braces around if-statements
Add braces around if-statements.
No functional change.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
e3e84983fb target-microblaze: dec_barrel: Use extract32
Use extract32 instead of opencoding the shifting and masking.
No functional change.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
bc54e71e0c target-microblaze: dec_barrel: Use bool instead of unsigned int
Use bool instead of unsigned int to represent flags.
No functional change.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
8fc5239e1f target-microblaze: Introduce a use-pcmp-instr property
Introduce a use-pcmp-instr property making pcmp instructions
optional.

Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
5683750909 target-microblaze: Introduce a use-msr-instr property
Introduce a use-msr-instr property making msr instructions
optional.

Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
9b9643181a target-microblaze: Introduce a use-hw-mul property
Introduce a use-div property making multiplication instructions
optional.

Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
47709e4c66 target-microblaze: Introduce a use-div property
Introduce a use-div property making division instructions
optional.

Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
7faa66aaf8 target-microblaze: Introduce a use-barrel property
Introduce a use-barrel property making barrel shifter instructions
optional.

Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
d79fcbc298 target-microblaze: Add CPU versions 9.4, 9.5 and 9.6
Add CPU versions 9.4, 9.5 and 9.6.

Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
3e92250589 target-microblaze: Don't hard code 0xb as initial MB version
Don't hard code 0xb as initial MB version.

Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Edgar E. Iglesias
79549c9960 target-microblaze: Correct bit shift for the PVR0 version field
Correct bit shift for the PVR0 version field.

Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Stefan Weil
9a32e6f3a1 disas/microblaze: Add missing 'const' attributes
Making the opcode list 'const' saves memory.
Some function arguments and local variables needed 'const', too.

Add also 'static' to two local functions.

Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Stefan Weil <sw@weilnetz.de>
[EI: Removed old prototypes to fix the build]
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-07-04 09:22:20 +02:00
Michael S. Tsirkin
d2f9ca9416 i386/acpi: update expected acpi files
We dropped some dead code, update extected table binaries.

Fixes: 4d7e7f2702 ("hw/acpi: remove dead acpi code")
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:42:36 +03:00
Michael S. Tsirkin
2eef278b9e virtio-net: fix tx queue size for !vhost-user
Current code segfaults when no nic peer is specified.
Fix it up - fall back to default queue size.

Fixes: 9b02e1618c ("virtio-net: enable configurable tx queue size")
Cc: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Ben Warren
83f3c70919 tests: Add unit tests for the VM Generation ID feature
The following tests are implemented:
* test that a GUID passed in by command line is propagated to the guest.
  Read the GUID from guest memory
* test that the "auto" argument to the GUID generates a valid GUID, as
  seen by the guest.
* test that a GUID passed in can be queried from the monitor

  This patch is loosely based on a previous patch from:
  Gal Hammer <ghammer@redhat.com>  and Igor Mammedov <imammedo@redhat.com>

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Maxime Coquelin
b9ec9bd468 vhost-user: unregister slave req handler at cleanup time
If the backend sends a request just before closing the socket,
the aio dispatcher might schedule its reading after the vhost
device has been cleaned, leading to a NULL pointer dereference
in slave_read();

vhost_user_cleanup() already closes the socket but it is not
enough, the handler has to be unregistered.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Maxime Coquelin
384b557da1 vhost: ensure vhost_ops are set before calling iotlb callback
This patch fixes a crash that happens when vhost-user iommu
support is enabled and vhost-user socket is closed.

When it happens, if an IOTLB invalidation notification is sent
by the IOMMU, vhost_ops's NULL pointer is dereferenced.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Peter Xu
552a1e01a4 intel_iommu: fix migration breakage on mr switch
Migration is broken after the vfio integration work:

qemu-kvm: AHCI: Failed to start FIS receive engine: bad FIS receive buffer address
qemu-kvm: Failed to load ich9_ahci:ahci
qemu-kvm: error while loading state for instance 0x0 of device '0000:00:1f.2/ich9_ahci'
qemu-kvm: load of migration failed: Operation not permitted

The problem is that vfio work introduced dynamic memory region
switching (actually it is also used for future PT mode), and this memory
region layout is not properly delivered to destination when migration
happens. Solution is to rebuild the layout in post_load.

Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1459906
Fixes: 558e0024 ("intel_iommu: allow dynamic switch of IOMMU region")
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Aleksandr Bezzubikov
4d7e7f2702 hw/acpi: remove dead acpi code
Signed-off-by: Aleksandr Bezzubikov <zuban32s@gmail.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Mark Cave-Ayland
3c1aa733d9 fw_cfg: move setting of FW_CFG_VERSION_DMA bit to fw_cfg_init1()
The setting of the FW_CFG_VERSION_DMA bit is the same across both the
TYPE_FW_CFG_MEM and TYPE_FW_CFG_IO devices, so unify the logic in
fw_cfg_init1().

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Gabriel Somlo <somlo@cmu.edu>
2017-07-03 22:29:49 +03:00
Mark Cave-Ayland
91685323b1 fw_cfg: don't map the fw_cfg IO ports in fw_cfg_io_realize()
As indicated by Laszlo it is a QOM bug for the realize() method to actually
map the device. Set up the IO regions within fw_cfg_io_realize() and defer
the mapping with sysbus_add_io() to the caller, as already done in
fw_cfg_init_mem_wide().

This makes the iobase and dma_iobase properties now obsolete so they can be
removed.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Gabriel Somlo <somlo@cmu.edu>
2017-07-03 22:29:49 +03:00
Mao Zhongyi
c0e9067902 i386/kvm/pci-assign: Use errp directly rather than local_err
In assigned_device_pci_cap_init(), first, error messages are filled
to a local_err variable, then through error_propagate() pass to
the parameter of errp. It leads to cumbersome code. In order to
avoid the extra local_err and error_propagate(), drop it and use
errp instead.

Cc: pbonzini@redhat.com
Cc: rth@twiddle.net
Cc: ehabkost@redhat.com
Cc: mst@redhat.com
Cc: armbru@redhat.com
Cc: marcel@redhat.com
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Mao Zhongyi
6b728b3116 i386/kvm/pci-assign: Fix return type of verify_irqchip_kernel()
When the function no success value to transmit, it usually make the
function return void. It has turned out not to be a success, because
it means that the extra local_err variable and error_propagate() will
be needed. It leads to cumbersome code, therefore, transmit success/
failure in the return value is worth. So fix the return type to avoid
it.

Cc: pbonzini@redhat.com
Cc: rth@twiddle.net
Cc: ehabkost@redhat.com
Cc: mst@redhat.com
Cc: armbru@redhat.com
Cc: marcel@redhat.com
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Mao Zhongyi
344475e77d pci: Convert shpc_init() to Error
In order to propagate error message better, convert shpc_init() to
Error also convert the pci_bridge_dev_initfn() to realize.

Cc: mst@redhat.com
Cc: marcel@redhat.com
Cc: armbru@redhat.com
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Mao Zhongyi
f8cd1b0201 pci: Convert to realize
Convert i82801b11, io3130_upstream, io3130_downstream and
pcie_root_port devices to realize.

Cc: mst@redhat.com
Cc: marcel@redhat.com
Cc: armbru@redhat.com
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Mao Zhongyi
2784127857 pci: Replace pci_add_capability2() with pci_add_capability()
After the patch 'Make errp the last parameter of pci_add_capability()',
pci_add_capability() and pci_add_capability2() now do exactly the same.
So drop the wrapper pci_add_capability() of pci_add_capability2(), then
replace the pci_add_capability2() with pci_add_capability() everywhere.

Cc: pbonzini@redhat.com
Cc: rth@twiddle.net
Cc: ehabkost@redhat.com
Cc: mst@redhat.com
Cc: dmitry@daynix.com
Cc: jasowang@redhat.com
Cc: marcel@redhat.com
Cc: alex.williamson@redhat.com
Cc: armbru@redhat.com
Suggested-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Mao Zhongyi
9a7c2a5970 pci: Make errp the last parameter of pci_add_capability()
Add Error argument for pci_add_capability() to leverage the errp
to pass info on errors. This way is helpful for its callers to
make a better error handling when moving to 'realize'.

Cc: pbonzini@redhat.com
Cc: rth@twiddle.net
Cc: ehabkost@redhat.com
Cc: mst@redhat.com
Cc: jasowang@redhat.com
Cc: marcel@redhat.com
Cc: alex.williamson@redhat.com
Cc: armbru@redhat.com
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Mao Zhongyi
9a815774bb pci: Fix the wrong assertion.
pci_add_capability returns a strictly positive value on success,
correct asserts.

Cc: dmitry@daynix.com
Cc: jasowang@redhat.com
Cc: kraxel@redhat.com
Cc: alex.williamson@redhat.com
Cc: armbru@redhat.com
Cc: marcel@redhat.com
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:49 +03:00
Mao Zhongyi
eacbc63211 pci: Add comment for pci_add_capability2()
Comments for pci_add_capability2() to explain the return
value. This may help to make a correct return value check
for its callers.

Cc: mst@redhat.com
Cc: marcel@redhat.com
Cc: armbru@redhat.com
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:48 +03:00
Mao Zhongyi
673b0d7ccc pci: Clean up error checking in pci_add_capability()
On success, pci_add_capability2() returns a positive value. On
failure, it sets an error and return a negative value.

pci_add_capability() laboriously checks this behavior. No other
caller does. Drop the checks from pci_add_capability().

Cc: mst@redhat.com
Cc: marcel@redhat.com
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:48 +03:00
Ladi Prosek
8991c460be intel_iommu: relax iq tail check on VTD_GCMD_QIE enable
The VT-d spec (section 6.5.2) prescribes software to zero the
Invalidation Queue Tail Register before enabling the VTD_GCMD_QIE
Global Command Register bit. Windows Server 2012 R2 and possibly
other older Windows versions violate the protocol and set a
non-zero queue tail first, which in effect makes them crash early
on boot with -device intel-iommu,intremap=on.

This commit relaxes the check and instead of failing to enable
VTD_GCMD_QIE with vtd_err_qi_enable, it behaves as if the tail
register was set just after enabling VTD_GCMD_QIE
(see vtd_handle_iqt_write).

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:48 +03:00
Thomas Huth
ba94971354 hw/pci-bridge/dec: Classify the DEC PCI bridge as bridge device
This way the bridge shows up in the correct section of the
"-device help" text.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2017-07-03 22:29:48 +03:00
Wei Wang
9b02e1618c virtio-net: enable configurable tx queue size
This patch enables the virtio-net tx queue size to be configurable
between 256 (the default queue size) and 1024 by the user when the
vhost-user backend is used.

Currently, the maximum tx queue size for other backends is 512 due
to the following limitations:
- QEMU backend: the QEMU backend implementation in some cases may
send 1024+1 iovs to writev.
- Vhost_net backend: there are possibilities that the guest sends
a vring_desc of memory which crosses a MemoryRegion thereby
generating more than 1024 iovs after translation from guest-physical
address in the backend.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-03 22:29:48 +03:00
Peter Maydell
fd479c60f5 Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170603' into staging
Queued TCG patches

# gpg: Signature made Fri 30 Jun 2017 20:03:53 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20170603:
  tcg: consistently access cpu->tb_jmp_cache atomically
  gen-icount: use tcg_ctx.tcg_env instead of cpu_env
  gen-icount: add missing inline to gen_tb_end

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-07-03 09:54:32 +01:00
Emilio G. Cota
f3ced3c592 tcg: consistently access cpu->tb_jmp_cache atomically
Some code paths can lead to atomic accesses racing with memset()
on cpu->tb_jmp_cache, which can result in torn reads/writes
and is undefined behaviour in C11.

These torn accesses are unlikely to show up as bugs, but from code
inspection they seem possible. For example, tb_phys_invalidate does:
    /* remove the TB from the hash list */
    h = tb_jmp_cache_hash_func(tb->pc);
    CPU_FOREACH(cpu) {
        if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {
            atomic_set(&cpu->tb_jmp_cache[h], NULL);
        }
    }
Here atomic_set might race with a concurrent memset (such as the
ones scheduled via "unsafe" async work, e.g. tlb_flush_page) and
therefore we might end up with a torn pointer (or who knows what,
because we are under undefined behaviour).

This patch converts parallel accesses to cpu->tb_jmp_cache to use
atomic primitives, thereby bringing these accesses back to defined
behaviour. The price to pay is to potentially execute more instructions
when clearing cpu->tb_jmp_cache, but given how infrequently they happen
and the small size of the cache, the performance impact I have measured
is within noise range when booting debian-arm.

Note that under "safe async" work (e.g. do_tb_flush) we could use memset
because no other vcpus are running. However I'm keeping these accesses
atomic as well to keep things simple and to avoid confusing analysis
tools such as ThreadSanitizer.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1497486973-25845-1-git-send-email-cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-30 11:40:59 -07:00
Emilio G. Cota
53f6672bcf gen-icount: use tcg_ctx.tcg_env instead of cpu_env
We are relying on cpu_env being defined as a global, yet most
targets (i.e. all but arm/a64) have it defined as a local variable.
Luckily all of them use the same "cpu_env" name, but really
compilation shouldn't break if the name of that local variable
changed.

Fix it by using tcg_ctx.tcg_env, which all targets set in their
translate_init function. This change also helps paving the way
for the upcoming "translation loop common to all targets" work.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1497639397-19453-3-git-send-email-cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-30 11:40:59 -07:00
Emilio G. Cota
ae06cb46b2 gen-icount: add missing inline to gen_tb_end
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1497639397-19453-2-git-send-email-cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-30 11:40:59 -07:00
Peter Maydell
82d76dc7fc Merge remote-tracking branch 'remotes/famz/tags/block-pull-request' into staging
# gpg: Signature made Fri 30 Jun 2017 15:08:45 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/block-pull-request:
  block: Exploit BDRV_BLOCK_EOF for larger zero blocks
  block: Add BDRV_BLOCK_EOF to bdrv_get_block_status()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-30 16:29:51 +01:00
Peter Maydell
6db174aed1 Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.10-pull-request' into staging
# gpg: Signature made Fri 30 Jun 2017 13:30:44 BST
# gpg:                using RSA key 0xF30C38BD3F2FBE3C
# gpg: Good signature from "Laurent Vivier <lvivier@redhat.com>"
# gpg:                 aka "Laurent Vivier <laurent@vivier.eu>"
# gpg:                 aka "Laurent Vivier (Red Hat) <lvivier@redhat.com>"
# Primary key fingerprint: CD2F 75DD C8E3 A4DC 2E4F  5173 F30C 38BD 3F2F BE3C

* remotes/vivier/tags/m68k-for-2.10-pull-request:
  target/m68k: add fmovem
  target/m68k: add explicit single and double precision operations (part 2)
  target/m68k: add fsglmul and fsgldiv
  softfloat: define floatx80_round()
  target/m68k: add explicit single and double precision operations
  target/m68k: add fmovecr
  target/m68k: add fscc.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-30 14:59:01 +01:00
Eric Blake
c61e684e44 block: Exploit BDRV_BLOCK_EOF for larger zero blocks
When we have a BDS with unallocated clusters, but asking the status
of its underlying bs->file or backing layer encounters an end-of-file
condition, we know that the rest of the unallocated area will read as
zeroes.  However, pre-patch, this required two separate calls to
bdrv_get_block_status(), as the first call stops at the point where
the underlying file ends.  Thanks to BDRV_BLOCK_EOF, we can now widen
the results of the primary status if the secondary status already
includes BDRV_BLOCK_ZERO.

In turn, this fixes a TODO mentioned in iotest 154, where we can now
see that all sectors in a partial cluster at the end of a file read
as zero when coupling the shorter backing file's status along with our
knowledge that the remaining sectors came from an unallocated cluster.

Also, note that the loop in bdrv_co_get_block_status_above() had an
inefficent exit: in cases where the active layer sets BDRV_BLOCK_ZERO
but does NOT set BDRV_BLOCK_ALLOCATED (namely, where we know we read
zeroes merely because our unallocated clusters lie beyond the backing
file's shorter length), we still ended up probing the backing layer
even though we already had a good answer.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170505021500.19315-3-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-30 21:48:06 +08:00
Eric Blake
fb0d8654ff block: Add BDRV_BLOCK_EOF to bdrv_get_block_status()
Just as the block layer already sets BDRV_BLOCK_ALLOCATED as a
shortcut for subsequent operations, there are also some optimizations
that are made easier if we can quickly tell that *pnum will advance
us to the end of a file, via a new BDRV_BLOCK_EOF which gets set
by the block layer.

This just plumbs up the new bit; subsequent patches will make use
of it.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170505021500.19315-2-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-30 21:48:06 +08:00
Peter Maydell
0912d0f2c7 Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Fri 30 Jun 2017 12:46:17 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  virtio-pci: use ioeventfd even when KVM is disabled
  tests: fix virtio-net-test ISR dependence
  tests: fix virtio-blk-test ISR dependence
  tests: fix virtio-scsi-test ISR dependence
  libqos: add virtio used ring support
  libqos: fix typo in virtio.h QVirtQueue->used comment
  virtio-blk: trace vdev so devices can be distinguished

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-30 13:26:41 +01:00
Peter Maydell
36f87b4513 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.10-20170630' into staging
ppc patch queue 2017-06-30

  * More DRC cleanups, these now actually fix a few bugs
  * Properly implements the openpic timers (they now count and
    generate interrupts)
  * Fixes for XICS migration
  * Fixes for migration of POWER9 RPT guests
  * The last of the compatibility mode rework

# gpg: Signature made Fri 30 Jun 2017 10:52:25 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.10-20170630: (21 commits)
  spapr: Clean up DRC set_isolation_state() path
  spapr: Clean up DRC set_allocation_state path
  spapr: Make DRC reset force DRC into known state
  spapr: Split DRC release from DRC detach
  spapr: Eliminate DRC 'signalled' state variable
  spapr: Start hotplugged PCI devices in ISOLATED state
  target-ppc: Enable open-pic timers to count and generate interrupts
  hw/ppc/spapr.c: consecutive 'spapr->patb_entry = 0' statements
  spapr: prevent QEMU crash when CPU realization fails
  target/ppc: Proper cleanup when ppc_cpu_realizefn fails
  spapr: fix migration of ICPState objects from/to older QEMU
  xics: directly register ICPState objects to vmstate
  target/ppc: Fix return value in tcg radix mmu fault handler
  target/ppc/excp_helper: Take BQL before calling cpu_interrupt()
  spapr: Fix migration of Radix guests
  spapr: Add a "no HPT" encoding to HTAB migration stream
  ppc: Rework CPU compatibility testing across migration
  pseries: Reset CPU compatibility mode
  pseries: Move CPU compatibility property to machine
  qapi: add explicit null to string input and output visitors
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-30 11:58:49 +01:00
Stefan Hajnoczi
c324fd0a39 virtio-pci: use ioeventfd even when KVM is disabled
Old kvm.ko versions only supported a tiny number of ioeventfds so
virtio-pci avoids ioeventfds when kvm_has_many_ioeventfds() returns 0.

Do not check kvm_has_many_ioeventfds() when KVM is disabled since it
always returns 0.  Since commit 8c56c1a592
("memory: emulate ioeventfd") it has been possible to use ioeventfds in
qtest or TCG mode.

This patch makes -device virtio-blk-pci,iothread=iothread0 work even
when KVM is disabled.

I have tested that virtio-blk-pci works under TCG both with and without
iothread.

This patch fixes qemu-iotests 068, which was accidentally merged early
despite the dependency on ioeventfd.

Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
Tested-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 20170628184724.21378-7-stefanha@redhat.com
Message-id: 20170615163813.7255-2-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-30 11:03:45 +01:00
Stefan Hajnoczi
8e11c9d365 tests: fix virtio-net-test ISR dependence
Use the new used ring APIs instead of assuming ISR being set means the
request has completed.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
Tested-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 20170628184724.21378-6-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-30 11:03:45 +01:00
Stefan Hajnoczi
12dfbdcabf tests: fix virtio-blk-test ISR dependence
Use the new used ring APIs instead of assuming ISR being set means the
request has completed.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
Tested-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 20170628184724.21378-5-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-30 11:03:45 +01:00
Stefan Hajnoczi
29509a7bbc tests: fix virtio-scsi-test ISR dependence
Use the new used ring APIs instead of assuming ISR being set means the
request has completed.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
Tested-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 20170628184724.21378-4-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-30 11:03:45 +01:00
Stefan Hajnoczi
e77abbe98b libqos: add virtio used ring support
Existing tests do not touch the virtqueue used ring.  Instead they poll
the virtqueue ISR register and peek into their request's device-specific
status field.

It turns out that the virtqueue ISR register can be set to 1 more than
once for a single notification (see commit
83d768b564 "virtio: set ISR on dataplane
notifications").  This causes problems for tests that assume a 1:1
correspondence between the ISR being 1 and request completion.

Peeking at device-specific status fields is also problematic if the
device has no field that can be abused for EINPROGRESS polling
semantics.  This is the case if all the field's values may be set by the
device; there's no magic constant left for polling.

It's time to process the used ring for completed requests, just like a
real virtio guest driver.  This patch adds the necessary APIs.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
Tested-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 20170628184724.21378-3-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-30 11:03:45 +01:00
Stefan Hajnoczi
afbccba608 libqos: fix typo in virtio.h QVirtQueue->used comment
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
Tested-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 20170628184724.21378-2-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-30 11:03:45 +01:00
David Gibson
0dfabd39d5 spapr: Clean up DRC set_isolation_state() path
There are substantial differences in the various paths through
set_isolation_state(), both for setting to ISOLATED versus UNISOLATED
state and for logical versus physical DRCs.

So, split the set_isolation_state() method into isolate() and unisolate()
methods, and give it different implementations for the two DRC types.

Factor some minimal common checks, including for valid indicator values
(which we weren't previously checking) into rtas_set_isolation_state().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-30 14:03:32 +10:00
David Gibson
617367321e spapr: Clean up DRC set_allocation_state path
The allocation-state indicator should only actually be implemented for
"logical" DRCs, not physical ones.  Factor a check for this, and also for
valid indicator state values into rtas_set_allocation_state().  Because
they don't exist for physical DRCs, there's no reason that we'd ever want
more than one method implementation, so it can just be a plain function.

In addition, the setting to USABLE and setting to UNUSABLE paths in
set_allocation_state() don't actually have much in common.  So, split the
method separate functions for each parameter value (drc_set_usable()
and drc_set_unusable()).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-30 14:03:32 +10:00
David Gibson
4f9242fc93 spapr: Make DRC reset force DRC into known state
The reset handler for DRCs attempts several state transitions which are
subject to various checks and restrictions.  But at reset time we know
there is no guest, so we can ignore most of the usual sequencing rules and
just set the DRC back to a known state.  In fact, it's safer to do so.

The existing code also has several redundant checks for
drc->awaiting_release inside a block which has already tested that.  This
patch removes those and sets the DRC to a fixed initial state based only
on whether a device is currently plugged or not.

With DRCs correctly reset to a state based on device presence, we don't
need to force state transitions as cold plugged devices are processed.
This allows us to remove all the callers of the set_*_state() methods from
outside spapr_drc.c.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-30 14:03:32 +10:00
David Gibson
9c914e5370 spapr: Split DRC release from DRC detach
spapr_drc_detach() is called when qemu generic code requests a device be
unplugged.  It makes a number of tests, which could well delay further
action until later, before actually detach the device from the DRC.

This splits out the part which actually removes the device from the DRC
into spapr_drc_release().  This will be useful for further cleanups.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-30 14:03:32 +10:00
David Gibson
307b7715d0 spapr: Eliminate DRC 'signalled' state variable
The 'signalled' field in the DRC appears to be entirely a torturous
workaround for the fact that PCI devices were started in UNISOLATED state
for unclear reasons.

1) 'signalled' is already meaningless for logical (so far, all non PCI)
DRCs.  It's always set to true (at least at any point it might be tested),
and can't be assigned any real meaning due to the way signalling works for
logical DRCs.

2) For PCI DRCs, the only time signalled would be false is when non-zero
functions of a multifunction device are hotplugged, followed by function
zero (the other way around is explicitly not permitted). In that case the
secondary function DRCs are attached, but the notification isn't sent to
the guest until function 0 is plugged.

3) signalled being false is used to allow a DRC detach to switch mode
back to ISOLATED state, which allows a secondary function to be hotplugged
then unplugged with function 0 never inserted.  Without this a secondary
function starting in UNISOLATED state couldn't be detached again without
function 0 being inserted, all the functions configured by the guest, then
sent back to ISOLATED state.

4) But now that PCI DRCs start in ISOLATED state, there's nothing to be
done.  If the guest doesn't get the notification, it won't switch the
device to UNISOLATED state, so nothing prevents it from being unplugged.
If the guest does move it to UNISOLATED state without the signal (due to
a manual drmgr call, for instance) then it really isn't safe to unplug it.

So, this patch removes the signalled variable and all code related to it.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-30 14:03:32 +10:00
David Gibson
af8ad96bd0 spapr: Start hotplugged PCI devices in ISOLATED state
PCI DRCs, and only PCI DRCs, are immediately moved to UNISOLATED isolation
state once the device is attached.  This has been there from the initial
implementation, and it's not clear why.

The state diagram in PAPR 13.4 suggests PCI devices should start in
ISOLATED state until the guest moves them into UNISOLATED, and the code in
the guest-side drmgr tool seems to work that way too.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-06-30 14:03:32 +10:00
Aaron Larson
ddd5140b1a target-ppc: Enable open-pic timers to count and generate interrupts
Previously QEMU open-pic implemented the 4 open-pic timers including
all timer registers, but the timers did not "count" or generate any
interrupts.  The patch makes the timers both count and generate
interrupts.  The timer clock frequency is fixed at 25MHZ.

--

Responding to V2 patch comments.
- Simplify clock frequency logic and commentary.
- Remove camelCase variables.
- Timer objects now created at init rather than lazily.

Signed-off-by: Aaron Larson <alarson@ddci.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:32 +10:00
Daniel Henrique Barboza
aca8bf9f1c hw/ppc/spapr.c: consecutive 'spapr->patb_entry = 0' statements
In ppc_spapr_reset(), if the guest is using HPT, the code was executing:

    } else {
        spapr->patb_entry = 0;
        spapr_setup_hpt_and_vrma(spapr);
    }

And, at the end of spapr_setup_hpt_and_vrma:

    /* We're setting up a hash table, so that means we're not radix */
    spapr->patb_entry = 0;

Resulting in spapr->patb_entry being assigned to 0 twice in a row.

Given that 'spapr_setup_hpt_and_vrma' is also called inside
'spapr_check_setup_free_hpt' of spapr_hcall.c, this trivial patch removes
the 'patb_entry = 0' assignment from the 'else' clause inside ppc_spapr_reset
to avoid this behavior.

Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:31 +10:00
Bharata B Rao
6595ab3158 spapr: prevent QEMU crash when CPU realization fails
ICPState objects were being allocated before CPU thread realization.
However commit 9ed656631d (xics: setup cpu at realize time) reversed it
by allocating ICPState objects after CPU thread is realized. But it
didn't take care to fix the error path because of which we observe
a SIGSEGV when CPU thread realization fails during cold/hotplug.

Fix this by ensuring that we do object_unparent() of ICPState object
only in case when is was created earlier.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:31 +10:00
Bharata B Rao
fd35656368 target/ppc: Proper cleanup when ppc_cpu_realizefn fails
If ppc_cpu_realizefn() fails after cpu_exec_realizefn() has been
called, we will have to undo whatever cpu_exec_realizefn() did
by explicitly calling cpu_exec_unrealizeffn() which is currently
missing. Failure to do this proper cleanup will result in CPU
which was never fully realized to linger on the cpus list causing
SIGSEGV later (for eg when running "info cpus").

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:31 +10:00
Greg Kurz
46f7afa370 spapr: fix migration of ICPState objects from/to older QEMU
Commit 5bc8d26de2 ("spapr: allocate the ICPState object from under
sPAPRCPUCore") moved ICPState objects from the machine to CPU cores.
This is an improvement since we no longer allocate ICPState objects
that will never be used. But it has the side-effect of breaking
migration of older machine types from older QEMU versions.

This patch allows spapr to register dummy "icp/server" entries to vmstate.
These entries use a dedicated VMStateDescription that can swallow and
discard state of an incoming migration stream, and that don't send anything
on outgoing migration.

As for real ICPState objects, the instance_id is the cpu_index of the
corresponding vCPU, which happens to be equal to the generated instance_id
of older machine types.

The machine can unregister/register these entries when CPUs are dynamically
plugged/unplugged.

This is only available for pseries-2.9 and older machines, thanks to a
compat property.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:31 +10:00
Greg Kurz
c95f6161de xics: directly register ICPState objects to vmstate
The ICPState objects are currently registered to vmstate as qdev objects.
Their instance ids are hence computed automatically in the migration code,
and thus depends on the order the CPU cores were plugged.

If the destination had its CPU cores plugged in a different order than the
source, then ICPState objects will have different instance_ids and load
the wrong state.

Since CPU objects have a reliable cpu_index which is already used as
instance_id in vmstate, let's use it for ICPState as well.

Please note that this doesn't break migration. Older machine types used to
allocate and realize all ICPState objects at machine init time, for the whole
lifetime of the machine. The qdev instance ids are thus 0,1,2... nr_servers
and happen to map to the vCPU indexes.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:31 +10:00
Suraj Jitindar Singh
35068bd15e target/ppc: Fix return value in tcg radix mmu fault handler
The mmu fault handler should return 0 if it was able to successfully
handle the fault and a positive value otherwise.

Currently the tcg radix mmu fault handler will return 1 after
successfully handling a fault in virtual mode. This is incorrect
so fix it so that it returns 0 in this case.

The handler already correctly returns 0 when a fault was handled
in real mode and 1 if an interrupt was generated.

Fixes: d5fee0bbe6 ("target/ppc: Implement ISA V3.00 radix page fault handler")

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:31 +10:00
Thomas Huth
f1c29ebc51 target/ppc/excp_helper: Take BQL before calling cpu_interrupt()
Since the introduction of MTTCG, using the msgsnd instruction
abort()s if being called without holding the BQL. So let's protect
that part of the code now with qemu_mutex_lock_iothread().

Buglink: https://bugs.launchpad.net/qemu/+bug/1694998
Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:31 +10:00
Bharata B Rao
d39c90f5f3 spapr: Fix migration of Radix guests
Fix migration of radix guests by ensuring that we issue
KVM_PPC_CONFIGURE_V3_MMU for radix case post migration.

Reported-by: Nageswara R Sastry <rnsastry@linux.vnet.ibm.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:31 +10:00
Bharata B Rao
3a38429748 spapr: Add a "no HPT" encoding to HTAB migration stream
Add a "no HPT" encoding (using value -1) to the HTAB migration
stream (in the place of HPT size) when the guest doesn't allocate HPT.
This will help the target side to match target HPT with the source HPT
and thus enable successful migration.

Suggested-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:31 +10:00
David Gibson
d5fc133eed ppc: Rework CPU compatibility testing across migration
Migrating between different CPU versions is a bit complicated for ppc.
A long time ago, we ensured identical CPU versions at either end by
checking the PVR had the same value.  However, this breaks under KVM
HV, because we always have to use the host's PVR - it's not
virtualized.  That would mean we couldn't migrate between hosts with
different PVRs, even if the CPUs are close enough to compatible in
practice (sometimes identical cores with different surrounding logic
have different PVRs, so this happens in practice quite often).

So, we removed the PVR check, but instead checked that several flags
indicating supported instructions matched.  This turns out to be a bad
idea, because those instruction masks are not architected information, but
essentially a TCG implementation detail.  So changes to qemu internal CPU
modelling can break migration - this happened between qemu-2.6 and
qemu-2.7.  That was addressed by 146c11f1 "target-ppc: Allow eventual
removal of old migration mistakes".

Now, verification of CPU compatibility across a migration basically doesn't
happen.  We simply ignore the PVR of the incoming migration, and hope the
cpu on the destination is close enough to work.

Now that we've cleaned up handling of processor compatibility modes
for pseries machine type, we can do better.  For new machine types
(pseries-2.10+) We allow migration if:

    * The source and destination PVRs are for the same type of CPU, as
      determined by CPU class's pvr_match function
OR  * When the source was in a compatibility mode, and the destination CPU
      supports the same compatibility mode

For older machine types we retain the existing behaviour - current CAS
code will usually set a compat mode which would break backwards
migration if we made them use the new behaviour. [Fixed from an
earlier version by Greg Kurz].

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Tested-by: Andrea Bolognani <abologna@redhat.com>
2017-06-30 14:03:31 +10:00
David Gibson
66d5c492dd pseries: Reset CPU compatibility mode
Currently, the CPU compatibility mode is set when the cpu is initialized,
then again when the guest negotiates features.  This means if a guest
negotiates a compatibility mode, then reboots, that compatibility mode
will be retained across the reset.

Usually that will get overridden when features are negotiated on the next
boot, but it's still not really correct.  This patch moves the initial set
up of the compatibility mode from cpu init to reset time.  The mode *is*
retained if the reboot was caused by the feature negotiation (it might
be important in that case, though it's unlikely).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Tested-by: Andrea Bolognani <abologna@redhat.com>
2017-06-30 14:03:31 +10:00
David Gibson
7843c0d60d pseries: Move CPU compatibility property to machine
Server class POWER CPUs have a "compat" property, which is used to set the
backwards compatibility mode for the processor.  However, this only makes
sense for machine types which don't give the guest access to hypervisor
privilege - otherwise the compatibility level is under the guest's control.

To reflect this, this removes the CPU 'compat' property and instead
creates a 'max-cpu-compat' property on the pseries machine.  Strictly
speaking this breaks compatibility, but AFAIK the 'compat' option was
never (directly) used with -device or device_add.

The option was used with -cpu.  So, to maintain compatibility, this
patch adds a hack to the cpu option parsing to strip out any compat
options supplied with -cpu and set them on the machine property
instead of the now deprecated cpu property.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Tested-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>
Tested-by: Andrea Bolognani <abologna@redhat.com>
2017-06-30 14:03:31 +10:00
Greg Kurz
a733371214 qapi: add explicit null to string input and output visitors
This may be used for deprecated object properties that are kept for
backwards compatibility.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Tested-by: Andrea Bolognani <abologna@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:31 +10:00
Thomas Huth
6d034b7bf8 hw/ppc/prep: Remove superfluous call to soundhw_init()
When using the 40p machine, soundhw_init() is currently called twice,
one time from vl.c and one time from ibm_40p_init(). The call in
ibm_40p_init() was likely just a copy-and-paste from a old version
of the prep machine - but there the call to audio_init() (which was
the previous name of this function) has been removed many years ago
already, with commit b3e6d591b0
("audio: enable PCI audio cards for all PCI-enabled targets"), so
we certainly also do not need the soundhw_init() in the 40p function
anymore nowadays.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Sahid Ferdjaoui <sferdjao@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-30 14:03:31 +10:00
Laurent Vivier
a1e58ddcb3 target/m68k: add fmovem
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170628204241.32106-8-laurent@vivier.eu>
2017-06-29 20:29:57 +02:00
Laurent Vivier
77bdb22924 target/m68k: add explicit single and double precision operations (part 2)
Add fsabs, fdabs, fsneg, fdneg, fsmove and fdmove.

The value is converted using the new floatx80_round() function.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170628204241.32106-7-laurent@vivier.eu>
2017-06-29 20:29:00 +02:00
Laurent Vivier
2f77995ceb target/m68k: add fsglmul and fsgldiv
fsglmul and fsgldiv truncate data to single precision before computing
results.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170628204241.32106-6-laurent@vivier.eu>
2017-06-29 20:28:22 +02:00
Laurent Vivier
0f72129281 softfloat: define floatx80_round()
Add a function to round a floatx80 to the defined precision
(floatx80_rounding_precision)

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170628204241.32106-5-laurent@vivier.eu>
2017-06-29 20:27:39 +02:00
Laurent Vivier
a51b6bc38b target/m68k: add explicit single and double precision operations
Add fssqrt, fdsqrt, fsadd, fdadd, fssub, fdsub, fsmul, fdmul,
fsdiv, fddiv.

The precision is managed using set_floatx80_rounding_precision().

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170628204241.32106-4-laurent@vivier.eu>
2017-06-29 20:26:56 +02:00
Laurent Vivier
9d403660d9 target/m68k: add fmovecr
fmovecr moves a floating point constant from the
FPU ROM to a floating point register.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170628204241.32106-3-laurent@vivier.eu>
2017-06-29 20:26:01 +02:00
Laurent Vivier
dd337bf862 target/m68k: add fscc.
use DisasCompare with FPU conditions in fscc and fbcc.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170628204241.32106-2-laurent@vivier.eu>
2017-06-29 20:25:17 +02:00
Peter Maydell
c5eb5846d2 Merge remote-tracking branch 'remotes/dgilbert/tags/pull-hmp-20170629' into staging
HMP pull 2017-06-29

# gpg: Signature made Thu 29 Jun 2017 17:27:55 BST
# gpg:                using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-hmp-20170629:
  Add chardev-send-break monitor command
  monitor: Add -a (all) option to info registers

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-29 17:37:11 +01:00
Stefan Fritsch
bd1d5ad9f9 Add chardev-send-break monitor command
Sending a break on a serial console can be useful for debugging the
guest. But not all chardev backends support sending breaks (only telnet
and mux do). The chardev-send-break command allows to send a break even
if using other backends.

Signed-off-by: Stefan Fritsch <sf@sfritsch.de>
Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170611074817.13621-1-sf@sfritsch.de>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
  Use 'send a break' in all 3 pieces of text as suggested by eblake
2017-06-29 17:14:11 +01:00
Suraj Jitindar Singh
18f0828278 monitor: Add -a (all) option to info registers
The info registers command in the qemu monitor is used to dump register
values.

Currently this command uses the monitor cpu (which can be set by the
user) as the cpu for whose registers will be dumped. Sometimes it is
useful to see the registers for all cpus and currently this requires
setting the monitor cpu and the re-running the command for each cpu
in the system. I would be nice if there was an easier way to do this.

Add the "-a" option to the info registers command to dump the register
values for all cpus.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>

Message-Id: <20170608054116.17203-1-sjitindarsingh@gmail.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-29 17:14:11 +01:00
Peter Maydell
454d7dc9bc Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging
- fixes a minor bug that could possibly prevent old guests to remove
  directories
- makes default permissions for new files configurable from the cmdline
  when using mapped security modes
- handle transport errors
- g_malloc()+memcpy() converted to g_memdup()

# gpg: Signature made Thu 29 Jun 2017 14:12:42 BST
# gpg:                using DSA key 0x02FC3AEB0101DBC2
# gpg: Good signature from "Greg Kurz <groug@kaod.org>"
# gpg:                 aka "Greg Kurz <groug@free.fr>"
# gpg:                 aka "Greg Kurz <gkurz@linux.vnet.ibm.com>"
# gpg:                 aka "Gregory Kurz (Groug) <groug@free.fr>"
# gpg:                 aka "[jpeg image of size 3330]"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2BD4 3B44 535E C0A7 9894  DBA2 02FC 3AEB 0101 DBC2

* remotes/gkurz/tags/for-upstream:
  9pfs: handle transport errors in pdu_complete()
  xen-9pfs: disconnect if buffers are misconfigured
  virtio-9p: break device if buffers are misconfigured
  virtio-9p: message header is 7-byte long
  virtio-9p: record element after sanity checks
  9pfs: replace g_malloc()+memcpy() with g_memdup()
  9pfs: local: Add support for custom fmode/dmode in 9ps mapped security modes
  9pfs: local: remove: use correct path component

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-29 16:21:45 +01:00
John Arbuckle
e720624906 ui/cocoa.m: Fix compatibility issue with Mac OS 10.9 and under
The [NSEvent modifierFlags] method returns an NSEventModifierFlags type value in Mac OS 10.10. It use to be of type NSUInteger. Replacing NSEventModifierFlags with NSUInteger allows for the cooca.m file to be compiled on older versions of Mac OS. This patch was been tested on Mac OS 10.6 and Mac OS 10.12 without problem.

Signed-off-by: John Arbuckle <programmingkidx@gmail.com>
Message-id: F6C36C1A-4661-48F4-BEA6-3994889927D0@gmail.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-29 15:07:16 +01:00
Stefan Hajnoczi
a576ceac39 virtio-blk: trace vdev so devices can be distinguished
It is hard to analyze trace logs with multiple virtio-blk devices
because none of the trace events include the VirtIODevice *vdev.

This patch adds vdev so it's clear which device a request is associated
with.

I considered using VirtIOBlock *s instead but VirtIODevice *vdev is more
general and may be correlated with generic virtio trace events like
virtio_set_status.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170614092930.11234-1-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-29 14:31:16 +01:00
Greg Kurz
06a37db7b1 9pfs: handle transport errors in pdu_complete()
Contrary to what is written in the comment, a buggy guest can misconfigure
the transport buffers and pdu_marshal() may return an error.  If this ever
happens, it is up to the transport layer to handle the situation (9P is
transport agnostic).

This fixes Coverity issue CID1348518.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
2017-06-29 15:11:51 +02:00
Stefano Stabellini
e08d1e11ed xen-9pfs: disconnect if buffers are misconfigured
Implement xen_9pfs_disconnect by unbinding the event channels. On
xen_9pfs_free, call disconnect if any event channels haven't been
disconnected.

If the frontend misconfigured the buffers set the backend to "Closing"
and disconnect it. Misconfigurations include requesting a read of more
bytes than available on the ring buffer, or claiming to be writing more
data than available on the ring buffer.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-06-29 15:11:51 +02:00
Greg Kurz
8d37de41ca virtio-9p: break device if buffers are misconfigured
The 9P protocol is transport agnostic: if the guest misconfigured the
buffers, the best we can do is to set the broken flag on the device.

Signed-off-by: Greg Kurz <groug@kaod.org>
2017-06-29 15:11:51 +02:00
Greg Kurz
a4d9985450 virtio-9p: message header is 7-byte long
The 9p spec at http://man.cat-v.org/plan_9/5/intro reads:

 "Each 9P message begins with a four-byte size field specify-
  ing the length in bytes of the complete message including
  the four bytes of the size field itself.  The next byte is
  the message type, one of the constants in the enumeration in
  the include file <fcall.h>.  The next two bytes are an iden-
  tifying tag, described below."

ie, each message starts with a 7-byte long header.

The core 9P code already assumes this pretty much everywhere. This patch
does the following:
- makes the assumption explicit in the common 9p.h header, since it isn't
  related to the transport
- open codes the header size in handle_9p_output() and hardens the sanity
  check on the space needed for the reply message

Signed-off-by: Greg Kurz <groug@kaod.org>
Acked-by: Stefano Stabellini <sstabellini@kernel.org>
2017-06-29 15:11:50 +02:00
Greg Kurz
3a21fb2af0 virtio-9p: record element after sanity checks
If the guest sends a malformed request, we end up with a dangling pointer
in V9fsVirtioState. This doesn't seem to cause any bug, but let's remove
this side effect anyway.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-29 15:11:50 +02:00
Marc-André Lureau
453a1b234f 9pfs: replace g_malloc()+memcpy() with g_memdup()
I found these pattern via grepping the source tree. I don't have a
coccinelle script for it!

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
2017-06-29 15:11:50 +02:00
Tobias Schramm
b96feb2cb9 9pfs: local: Add support for custom fmode/dmode in 9ps mapped security modes
In mapped security modes, files are created with very restrictive
permissions (600 for files and 700 for directories). This makes
file sharing between virtual machines and users on the host rather
complicated. Imagine eg. a group of users that need to access data
produced by processes on a virtual machine. Giving those users access
to the data will be difficult since the group access mode is always 0.

This patch makes the default mode for both files and directories
configurable. Existing setups that don't know about the new parameters
keep using the current secure behavior.

Signed-off-by: Tobias Schramm <tobleminer@gmail.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-06-29 15:11:50 +02:00
Bruce Rogers
790db7efdb 9pfs: local: remove: use correct path component
Commit a0e640a8 introduced a path processing error.
Pass fstatat the dirpath based path component instead
of the entire path.

Signed-off-by: Bruce Rogers <brogers@suse.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-06-29 15:11:50 +02:00
Peter Maydell
4fe60423d7 Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170628' into staging
migration/next for 20170628

# gpg: Signature made Wed 28 Jun 2017 12:16:44 BST
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration/20170628:
  exec: fix access to ram_list.dirty_memory when sync dirty bitmap
  migration: add "return-path" capability
  vmstate: error hint for failed equal checks
  migration: add comment for TYPE_MIGRATE
  migration: hmp: dump globals
  migration: merge enforce_config_section somewhat
  migration: move skip_section_footers
  migration: move skip_configuration out
  migration: move only_migratable to MigrationState
  migration: move global_state.optional out
  migration: let MigrationState be a qdev
  vl: clean up global property registration
  accel: introduce AccelClass.global_props
  machine: export register_compat_prop()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-29 13:13:05 +01:00
Peter Maydell
4645886754 Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20170627-tag' into staging
Xen 2017/06/27

# gpg: Signature made Tue 27 Jun 2017 23:02:43 BST
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# gpg:                 aka "Stefano Stabellini <sstabellini@kernel.org>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20170627-tag:
  xen-disk: add support for multi-page shared rings
  xen-disk: only advertize feature-persistent if grant copy is not available
  xen/disk: don't leak stack data via response ring

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-29 11:45:01 +01:00
Peter Maydell
82991bed73 linux-user: Put PPC AT_IGNOREPPC auxv entries in the right place
The 32-bit PPC auxv is a bit complicated because in the
mists of time it used to be 16-aligned rather than directly
after the environment. Older glibc versions had code to
try to probe for whether it needed alignment or not:
https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c;hb=e84eabb3871c9b39e59323bf3f6b98c2ca9d1cd0
and the kernel has code which puts some magic entries at
the bottom to ensure that the alignment probe fails:
http://elixir.free-electrons.com/linux/latest/source/arch/powerpc/include/asm/elf.h#L158

QEMU has similar code too, but it was broken by commit
7c4ee5bcc8, which changed elfload.c from filling in
the auxv starting at the highest address and working down
to starting at the lowest address and working up. This
means that the ARCH_DLINFO hook must now be invoked first
rather than last, and the entries in it for PPC must
be reversed so that the magic AT_IGNOREPPC entries come
at the lowest address in the auxv as they should.

The effect of this was that if running a guest binary that
used an old glibc with the alignment probing the guest ld.so
code would segfault if the size of the guest environment and
argv happened to put the auxv at an address that triggered
the alignment code in the guest glibc.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Tested-by: Richard Henderson <rth@twiddle.net>
Message-id: 1498582198-6649-1-git-send-email-peter.maydell@linaro.org
2017-06-29 10:25:26 +01:00
Haozhong Zhang
084140bd49 exec: fix access to ram_list.dirty_memory when sync dirty bitmap
In cpu_physical_memory_sync_dirty_bitmap(rb, start, ...), the 2nd
argument 'start' is relative to the start of the ramblock 'rb'. When
it's used to access the dirty memory bitmap of ram_list (i.e.
ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]->blocks[]), an offset to
the start of all RAM (i.e. rb->offset) should be added to it, which has
however been missed since c/s 6b6712efcc. For a ramblock of host memory
backend whose offset is not zero, cpu_physical_memory_sync_dirty_bitmap()
synchronizes the incorrect part of the dirty memory bitmap of ram_list
to the per ramblock dirty bitmap. As a result, a guest with host
memory backend may crash after migration.

Fix it by adding the offset of ramblock when accessing the dirty memory
bitmap of ram_list in cpu_physical_memory_sync_dirty_bitmap().

Reported-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Message-Id: <20170628083704.24997-1-haozhong.zhang@intel.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Tested-by: Juan Quintela <quintela@redhat.com>
Tested-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 12:23:58 +02:00
Peter Xu
c788ada816 migration: add "return-path" capability
When this capability is enabled, QEMU will use the return path even for
precopy migration. This is helpful at least in one case when destination
failed to load the image while source quited without confirmation. With
return path, source will wait for the last response from destination,
and if destination fails, it'll fail the migration on source, then the
guest can be run again on the source (rather than assuming to be good,
then the guest will be lost after source quits).

It needs to be enabled explicitly on source, otherwise disabled.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498472935-14461-1-git-send-email-peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:51:10 +02:00
Halil Pasic
d2164ad35c vmstate: error hint for failed equal checks
In some cases a failing VMSTATE_*_EQUAL does not mean we detected a bug,
but it's actually the best we can do. Especially in these cases a verbose
error message is required.

Let's introduce infrastructure for specifying a error hint to be used if
equal check fails. Let's do this by adding a parameter to the _EQUAL
macros called _err_hint. Also change all current users to pass NULL as
last parameter so nothing changes for them.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>

Message-Id: <20170623144823.42936-1-pasic@linux.vnet.ibm.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:44 +02:00
Peter Xu
01f6e14c78 migration: add comment for TYPE_MIGRATE
It'll be strange that the migration object inherits TYPE_DEVICE. Add
some explanations to it.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498634144-26508-1-git-send-email-peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:39 +02:00
Peter Xu
9d18af93b3 migration: hmp: dump globals
Now we have some globals that can be configured for migration. Dump them
in HMP info migration for better debugging.

(we can also use this to monitor whether COMPAT fields are applied
correctly on compatible machines)

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498536619-14548-11-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:39 +02:00
Peter Xu
4ffdb337e7 migration: merge enforce_config_section somewhat
These two parameters:

- MachineState::enforce_config_section
- MigrationState::send_configuration

are playing similar role here. This patch merges the first one into
second, then we'll have a single place to reference whether we need to
send the configuration section.

I didn't remove the MachineState.enforce_config_section field since when
applying that machine property (in machine_set_property()) we haven't
yet initialized global properties and migration object. Then, it's
still not easy to pass that boolean to MigrationState at such an early
time.

A natural benefit for current patch is that now we kept the meaning of
"enforce-config-section" since it'll still have the highest
priority (that's what "enforce" mean I guess).

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498536619-14548-10-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:39 +02:00
Peter Xu
15c3850325 migration: move skip_section_footers
Move it into MigrationState, revert its meaning and renaming it to
send_section_footer, with a property bound to it. Same trick is played
like previous patches.

Removing savevm_skip_section_footers().

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498536619-14548-9-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:39 +02:00
Peter Xu
71dd4c1a56 migration: move skip_configuration out
It was in SaveState but now moved to MigrationState altogether, reverted
its meaning, then renamed to "send_configuration". Again, using
HW_COMPAT_2_3 for old PC/SPAPR machines, and accel_register_prop() for
xen_init().

Removing savevm_skip_configuration().

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498536619-14548-8-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:38 +02:00
Peter Xu
3df663e575 migration: move only_migratable to MigrationState
One less global variable, and it does only matter with migration.

We keep the old "--only-migratable" option, but also now we support:

  -global migration.only-migratable=true

Currently still keep the old interface.

Hmm, now vl.c has no way to access migrate_get_current(). Export a
function for it to setup only_migratable.

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498536619-14548-7-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:38 +02:00
Peter Xu
5272298c48 migration: move global_state.optional out
Put it into MigrationState then we can use the properties to specify
whether to enable storing global state.

Removing global_state_set_optional() since now we can use HW_COMPAT_2_3
for x86/power, and AccelClass.global_props for Xen.

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498536619-14548-6-git-send-email-peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:38 +02:00
Peter Xu
e5cb7e7677 migration: let MigrationState be a qdev
Let the old man "MigrationState" join the object family. Direct benefit
is that we can start to use all the property features derived from
current QDev, like: HW_COMPAT_* bits, command line setup for migration
parameters (so will never need to set them up each time using HMP/QMP,
this is really, really attractive for test writters), etc.

I see no reason to disallow this happen yet. So let's start from this
one, to see whether it would be anything good.

Now we init the MigrationState struct statically in main() to make sure
it's initialized after global properties are applied, since we'll use
them during creation of the object.

No functional change at all.

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498536619-14548-5-git-send-email-peterx@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:38 +02:00
Peter Xu
a0660e0bb8 vl: clean up global property registration
It's not that clear on how the global properties are registered to
global_props (and also its priority relationship). Let's provide a
single function to be called in main() for that, with comment to explain
it a bit.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498536619-14548-4-git-send-email-peterx@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:38 +02:00
Peter Xu
9ffea096b9 accel: introduce AccelClass.global_props
Introduce this new field for the accelerator classes so that each
specific accelerator in the future can register its own global
properties to be used further by the system. It works just like how the
old machine compatible properties do, but only tailored for
accelerators.

Introduce register_compat_props_array() for it. Export it so that it may
be used in other codes as well in the future.

Suggested-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498536619-14548-3-git-send-email-peterx@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:38 +02:00
Peter Xu
60d7cacac8 machine: export register_compat_prop()
We have HW_COMPAT_*, however that's only bound to machines, not other
things (like accelerators).  Behind it, it was register_compat_prop()
that played the trick.  Let's export the function for further use
outside HW_COMPAT_* magic.

Meanwhile, move it to qdev-properties.c where seems more proper (since
it'll be used not only in machine codes).

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1498536619-14548-2-git-send-email-peterx@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-28 11:18:38 +02:00
Paul Durrant
3284fad728 xen-disk: add support for multi-page shared rings
The blkif protocol has had provision for negotiation of multi-page shared
rings for some time now and many guest OS have support in their frontend
drivers.

This patch makes the necessary modifications to xen-disk support a shared
ring up to order 4 (i.e. 16 pages).

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
2017-06-27 15:01:56 -07:00
Paul Durrant
976eba1c88 xen-disk: only advertize feature-persistent if grant copy is not available
If grant copy is available then it will always be used in preference to
persistent maps. In this case feature-persistent should not be advertized
to the frontend, otherwise it may needlessly copy data into persistently
granted buffers.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
2017-06-27 15:01:49 -07:00
Stefano Stabellini
b0ac694fdb xen/disk: don't leak stack data via response ring
Rather than constructing a local structure instance on the stack, fill
the fields directly on the shared ring, just like other (Linux)
backends do. Build on the fact that all response structure flavors are
actually identical (aside from alignment and padding at the end).

This is XSA-216.

Reported by: Anthony Perard <anthony.perard@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
2017-06-27 14:45:34 -07:00
Peter Maydell
577caa2672 Merge remote-tracking branch 'remotes/edgar/tags/edgar/mmio-exec-v2.for-upstream' into staging
edgar/mmio-exec-v2.for-upstream

# gpg: Signature made Tue 27 Jun 2017 16:22:30 BST
# gpg:                using RSA key 0x29C596780F6BCA83
# gpg: Good signature from "Edgar E. Iglesias (Xilinx key) <edgar.iglesias@xilinx.com>"
# gpg:                 aka "Edgar E. Iglesias <edgar.iglesias@gmail.com>"
# Primary key fingerprint: AC44 FEDC 14F7 F1EB EDBF  4151 29C5 9678 0F6B CA83

* remotes/edgar/tags/edgar/mmio-exec-v2.for-upstream:
  xilinx_spips: allow mmio execution
  exec: allow to get a pointer for some mmio memory region
  introduce mmio_interface
  qdev: add MemoryRegion property
  cputlb: fix the way get_page_addr_code fills the tlb
  cputlb: move get_page_addr_code
  cputlb: cleanup get_page_addr_code to use VICTIM_TLB_HIT

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-27 16:56:55 +01:00
KONRAD Frederic
252b99baeb xilinx_spips: allow mmio execution
This allows to execute from the lqspi area.

When the request_ptr is called the device loads 1024bytes from the SPI device.
Then this code can be executed by the guest.

Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-06-27 15:09:15 +02:00
KONRAD Frederic
c935674635 exec: allow to get a pointer for some mmio memory region
This introduces a special callback which allows to run code from some MMIO
devices.

SysBusDevice with a MemoryRegion which implements the request_ptr callback will
be notified when the guest try to execute code from their offset. Then it will
be able to eg: pre-load some code from an SPI device or ask a pointer from an
external simulator, etc..

When the pointer or the data in it are no longer valid the device has to
invalidate it.

Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-06-27 15:09:15 +02:00
KONRAD Frederic
7cc2298c46 introduce mmio_interface
This introduces mmio_interface object which contains a MemoryRegion
and can be hotplugged/hotunplugged.

Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-06-27 15:09:15 +02:00
KONRAD Frederic
ed03d749f3 qdev: add MemoryRegion property
We need to pass a pointer to a MemoryRegion for mmio_interface.
So this just adds that.

Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-06-27 15:09:15 +02:00
KONRAD Frederic
71b9a45330 cputlb: fix the way get_page_addr_code fills the tlb
get_page_addr_code(..) does a cpu_ldub_code to fill the tlb:
This can lead to some side effects if a device is mapped at this address.

So this patch replaces the cpu_memory_ld by a tlb_fill.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-06-27 15:09:15 +02:00
KONRAD Frederic
f2553f0489 cputlb: move get_page_addr_code
This just moves the code before VICTIM_TLB_HIT macro definition
so we can use it.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-06-27 15:09:14 +02:00
KONRAD Frederic
3416343255 cputlb: cleanup get_page_addr_code to use VICTIM_TLB_HIT
This replaces env1 and page_index variables by env and index
so we can use VICTIM_TLB_HIT macro later.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-06-27 15:09:04 +02:00
Peter Maydell
054914f646 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Mon 26 Jun 2017 14:07:32 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (60 commits)
  qemu-img: don't shadow opts variable in img_dd()
  block: Do not strcmp() with NULL uri->scheme
  blkverify: Catch bs->exact_filename overflow
  blkdebug: Catch bs->exact_filename overflow
  fix: avoid an infinite loop or a dangling pointer problem in img_commit
  block: change variable names in BlockDriverState
  block: Remove bdrv_aio_readv/writev/flush()
  qed: Use bdrv_co_* for coroutine_fns
  qed: Add coroutine_fn to I/O path functions
  qed: Use a coroutine for need_check_timer
  qed: Simplify request handling
  qed: Use CoQueue for serialising allocations
  qed: Implement .bdrv_co_readv/writev
  qed: Remove recursion in qed_aio_next_io()
  qed: Remove ret argument from qed_aio_next_io()
  qed: Add return value to qed_aio_read/write_data()
  qed: Add return value to qed_aio_write_inplace/alloc()
  qed: Add return value to qed_aio_write_cow()
  qed: Add return value to qed_aio_write_main()
  qed: Add return value to qed_aio_write_l2_update()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-26 15:38:29 +01:00
Kevin Wolf
704e41ba78 Merge remote-tracking branch 'mreitz/tags/pull-block-2017-06-26' into queue-block
Block patches for the block queue

# gpg: Signature made Mon Jun 26 14:56:24 2017 CEST
# gpg:                using RSA key 0xF407DB0061D5CF40
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* mreitz/tags/pull-block-2017-06-26:
  qemu-img: don't shadow opts variable in img_dd()
  block: Do not strcmp() with NULL uri->scheme
  blkverify: Catch bs->exact_filename overflow
  blkdebug: Catch bs->exact_filename overflow
  fix: avoid an infinite loop or a dangling pointer problem in img_commit
  block: change variable names in BlockDriverState

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:57:27 +02:00
Stefan Hajnoczi
2a24570909 qemu-img: don't shadow opts variable in img_dd()
It's confusing when two different variables have the same name in one
function.

Cc: Reda Sallahi <fullmanet@gmail.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170619150002.3033-1-stefanha@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-26 14:54:46 +02:00
Max Reitz
f69165a8fe block: Do not strcmp() with NULL uri->scheme
uri_parse(...)->scheme may be NULL. In fact, probably every field may be
NULL, and the callers do test this for all of the other fields but not
for scheme (except for block/gluster.c; block/vxhs.c does not access
that field at all).

We can easily fix this by using g_strcmp0() instead of strcmp().

Cc: qemu-stable@nongnu.org
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170613205726.13544-1-mreitz@redhat.com
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-26 14:54:46 +02:00
Max Reitz
05cc758a3d blkverify: Catch bs->exact_filename overflow
The bs->exact_filename field may not be sufficient to store the full
blkverify node filename. In this case, we should not generate a filename
at all instead of an unusable one.

Cc: qemu-stable@nongnu.org
Reported-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170613172006.19685-3-mreitz@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-26 14:54:46 +02:00
Max Reitz
de81d72d3d blkdebug: Catch bs->exact_filename overflow
The bs->exact_filename field may not be sufficient to store the full
blkdebug node filename. In this case, we should not generate a filename
at all instead of an unusable one.

Cc: qemu-stable@nongnu.org
Reported-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170613172006.19685-2-mreitz@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-26 14:54:46 +02:00
sochin.jiang
4172a00373 fix: avoid an infinite loop or a dangling pointer problem in img_commit
img_commit could fall into an infinite loop calling run_block_job() if
its blockjob fails on any I/O error, fix this already known problem.

Signed-off-by: sochin.jiang <sochin.jiang@huawei.com>
Message-id: 1497509253-28941-1-git-send-email-sochin.jiang@huawei.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-26 14:54:46 +02:00
Manos Pitsidianakis
f5a5ca7969 block: change variable names in BlockDriverState
Change the 'int count' parameter in *pwrite_zeros, *pdiscard related
functions (and some others) to 'int bytes', as they both refer to bytes.
This helps with code legibility.

Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
Message-id: 20170609101808.13506-1-el13635@mail.ntua.gr
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-26 14:54:46 +02:00
Kevin Wolf
c5f1ad429c block: Remove bdrv_aio_readv/writev/flush()
These functions are unused now.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:15 +02:00
Kevin Wolf
0f714ec706 qed: Use bdrv_co_* for coroutine_fns
All functions that are marked coroutine_fn can directly call the
bdrv_co_* version of functions instead of going through the wrapper.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:15 +02:00
Kevin Wolf
87f0d88261 qed: Add coroutine_fn to I/O path functions
Now that we stay in coroutine context for the whole request when doing
reads or writes, we can add coroutine_fn annotations to many functions
that can do I/O or yield directly.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:15 +02:00
Kevin Wolf
c0e8f98927 qed: Use a coroutine for need_check_timer
This fixes the last place where we degraded from AIO to actual blocking
synchronous I/O requests. Putting it into a coroutine means that instead
of blocking, the coroutine simply yields while doing I/O.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:15 +02:00
Kevin Wolf
48cc565e76 qed: Simplify request handling
Now that we process a request in the same coroutine from beginning to
end and don't drop out of it any more, we can look like a proper
coroutine-based driver and simply call qed_aio_next_io() and get a
return value from it instead of spawning an additional coroutine that
reenters the parent when it's done.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:15 +02:00
Kevin Wolf
0806c3b5dd qed: Use CoQueue for serialising allocations
Now that we're running in coroutine context, the ad-hoc serialisation
code (which drops a request that has to wait out of coroutine context)
can be replaced by a CoQueue.

This means that when we resume a serialised request, it is running in
coroutine context again and its I/O isn't blocking any more.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:15 +02:00
Kevin Wolf
89f89709c7 qed: Implement .bdrv_co_readv/writev
Most of the qed code is now synchronous and matches the coroutine model.
One notable exception is the serialisation between requests which can
still schedule a callback. Before we can replace this with coroutine
locks, let's convert the driver's external interfaces to the coroutine
versions.

We need to be careful to handle both requests that call the completion
callback directly from the calling coroutine (i.e. fully synchronous
code) and requests that involve some callback, so that we need to yield
and wait for the completion callback coming from outside the coroutine.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:15 +02:00
Kevin Wolf
018598747c qed: Remove recursion in qed_aio_next_io()
Instead of calling itself recursively as the last thing, just convert
qed_aio_next_io() into a loop.

This patch is best reviewed with 'git show -w' because most of it is
just whitespace changes.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:15 +02:00
Kevin Wolf
dddf8db10b qed: Remove ret argument from qed_aio_next_io()
All callers pass ret = 0, so we can just remove it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
0596be7e6a qed: Add return value to qed_aio_read/write_data()
Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but
just return an error code and let the caller handle it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
d6daddcdeb qed: Add return value to qed_aio_write_inplace/alloc()
Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but
just return an error code and let the caller handle it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
a101341aa0 qed: Add return value to qed_aio_write_cow()
Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but
just return an error code and let the caller handle it.

While refactoring qed_aio_write_alloc() to accomodate the change,
qed_aio_write_zero_cluster() ended up with a single line, so I chose to
inline that line and remove the function completely.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
eaf0bc56f5 qed: Add return value to qed_aio_write_main()
Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but
just return an error code and let the caller handle it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
88d2dd72bc qed: Add return value to qed_aio_write_l2_update()
Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but
just return an error code and let the caller handle it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
fb18de21e0 qed: Add return value to qed_aio_write_l1_update()
Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but
just return an error code and let the caller handle it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
fae25ac7bd qed: Inline qed_commit_l2_update()
qed_commit_l2_update() is unconditionally called at the end of
qed_aio_write_l1_update(). Inline it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
a4d8f1aee1 qed: Make qed_aio_write_main() synchronous
Note that this code is generally not running in coroutine context, so
this is an actual blocking synchronous operation. We'll fix this in a
moment.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
3e248cdcd9 qed: Make qed_aio_read_data() synchronous
Note that this code is generally not running in coroutine context, so
this is an actual blocking synchronous operation. We'll fix this in a
moment.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
453e53e2a1 qed: Remove callback from qed_write_table()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
29470d11bf qed: Remove GenericCB
The GenericCB infrastructure isn't used any more. Remove it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
602b57fba4 qed: Make qed_write_table() synchronous
Note that this code is generally not running in coroutine context, so
this is an actual blocking synchronous operation. We'll fix this in a
moment.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
f13d712bb2 qed: Remove callback from qed_write_header()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
7076309aef qed: Make qed_write_header() synchronous
Note that this code is generally not running in coroutine context, so
this is an actual blocking synchronous operation. We'll fix this in a
moment.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
b4ac32f34f qed: Remove callback from qed_copy_from_backing_file()
With this change, qed_aio_write_prefill() and qed_aio_write_postfill()
collapse into a single function. This is reflected by a rename of the
combined function to qed_aio_write_cow().

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
0f7aa24d2c qed: Make qed_copy_from_backing_file() synchronous
Note that this code is generally not running in coroutine context, so
this is an actual blocking synchronous operation. We'll fix this in a
moment.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
e85c528142 qed: Make qed_read_backing_file() synchronous
Note that this code is generally not running in coroutine context, so
this is an actual blocking synchronous operation. We'll fix this in a
moment.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
0f21b7a1b7 qed: Remove callback from qed_find_cluster()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
a8165d2d66 qed: Remove callback from qed_read_l2_table()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
f6513529c6 qed: Remove callback from qed_read_table()
Instead of passing the return value to a callback, return it to the
caller so that the callback can be inlined there.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
11273076e9 qed: Make qed_read_table() synchronous
Note that this code is generally not running in coroutine context, so
this is an actual blocking synchronous operation. We'll fix this in a
moment.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Kevin Wolf
3b7cd9fd8f qed: Use bottom half to resume waiting requests
The qed driver serialises allocating write requests. When the active
allocation is finished, the AIO callback is called, but after this, the
next allocating request is immediately processed instead of leaving the
coroutine. Resuming another allocation request in the same request
coroutine means that the request now runs in the wrong coroutine.

The following is one of the possible effects of this: The completed
request will generally reenter its request coroutine in a bottom half,
expecting that it completes the request in bdrv_driver_pwritev().
However, if the second request actually yielded before leaving the
coroutine, the reused request coroutine is in an entirely different
place and is reentered prematurely. Not a good idea.

Let's make sure that we exit the coroutine after completing the first
request by resuming the next allocating request only with a bottom
half.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-26 14:51:14 +02:00
Alberto Garcia
24990c5b95 qcow2: Use offset_into_cluster() and offset_to_l2_index()
We already have functions for doing these calculations, so let's use
them instead of doing everything by hand. This makes the code a bit
more readable.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Alberto Garcia
ee22a9d869 qcow2: Merge the writing of the COW regions with the guest data
If the guest tries to write data that results on the allocation of a
new cluster, instead of writing the guest data first and then the data
from the COW regions, write everything together using one single I/O
operation.

This can improve the write performance by 25% or more, depending on
several factors such as the media type, the cluster size and the I/O
request size.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Alberto Garcia
86b862c431 qcow2: Pass a QEMUIOVector to do_perform_cow_{read,write}()
Instead of passing a single buffer pointer to do_perform_cow_write(),
pass a QEMUIOVector. This will allow us to merge the write requests
for the COW regions and the actual data into a single one.

Although do_perform_cow_read() does not strictly need to change its
API, we're doing it here as well for consistency.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Alberto Garcia
b3cf1c7cf8 qcow2: Allow reading both COW regions with only one request
Reading both COW regions requires two separate requests, but it's
perfectly possible to merge them and perform only one. This generally
improves performance, particularly on rotating disk drives. The
downside is that the data in the middle region is read but discarded.

This patch takes a conservative approach and only merges reads when
the size of the middle region is <= 16KB.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Alberto Garcia
672f0f2c4b qcow2: Split do_perform_cow() into _read(), _encrypt() and _write()
This patch splits do_perform_cow() into three separate functions to
read, encrypt and write the COW regions.

perform_cow() can now read both regions first, then encrypt them and
finally write them to disk. The memory allocation is also done in
this function now, using one single buffer large enough to hold both
regions.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Alberto Garcia
99450c6fb9 qcow2: Make perform_cow() call do_perform_cow() twice
Instead of calling perform_cow() twice with a different COW region
each time, call it just once and make perform_cow() handle both
regions.

This patch simply moves code around. The next one will do the actual
reordering of the COW operations.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Alberto Garcia
e034f5bcbc qcow2: Use unsigned int for both members of Qcow2COWRegion
Qcow2COWRegion has two attributes:

- The offset of the COW region from the start of the first cluster
  touched by the I/O request. Since it's always going to be positive
  and the maximum request size is at most INT_MAX, we can use a
  regular unsigned int to store this offset.

- The size of the COW region in bytes. This is guaranteed to be >= 0,
  so we should use an unsigned type instead.

In x86_64 this reduces the size of Qcow2COWRegion from 16 to 8 bytes.
It will also help keep some assertions simpler now that we know that
there are no negative numbers.

The prototype of do_perform_cow() is also updated to reflect these
changes.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Alberto Garcia
026ac1586b qcow2: Remove unused Error variable in do_perform_cow()
We are using the return value of qcow2_encrypt_sectors() to detect
problems but we are throwing away the returned Error since we have no
way to report it to the user. Therefore we can simply get rid of the
local Error variable and pass NULL instead.

Alternatively we could try to figure out a way to pass the original
error instead of simply returning -EIO, but that would be more
invasive, so let's keep the current approach.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Stephen Bates
b2b2b67a00 nvme: Add support for Read Data and Write Data in CMBs.
Add the ability for the NVMe model to support both the RDS and WDS
modes in the Controller Memory Buffer.

Although not currently supported in the upstreamed Linux kernel a fork
with support exists [1] and user-space test programs that build on
this also exist [2].

Useful for testing CMB functionality in preperation for real CMB
enabled NVMe devices (coming soon).

[1] https://github.com/sbates130272/linux-p2pmem
[2] https://github.com/sbates130272/p2pmem-test

Signed-off-by: Stephen Bates <sbates@raithlin.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Stefan Hajnoczi
ea4f3cebc4 qemu-iotests: 068: test iothread mode
Perform the savevm/loadvm test with both iothread on and off.  This
covers the recently found savevm/loadvm hang when iothread is enabled.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Stefan Hajnoczi
5aaf590df4 qemu-iotests: 068: use -drive/-device instead of -hda
The legacy -hda option does not support -drive/-device parameters.  They
will be required by the next patch that extends this test case.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Stefan Hajnoczi
79645e0569 qemu-iotests: 068: extract _qemu() function
Avoid duplicating the QEMU command-line.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Stefan Hajnoczi
1575829d2a migration: hold AioContext lock for loadvm qemu_fclose()
migration_incoming_state_destroy() uses qemu_fclose() on the vmstate
file.  Make sure to call it inside an AioContext acquire/release region.

This fixes an 'qemu: qemu_mutex_unlock: Operation not permitted' abort
in loadvm.

This patch closes the vmstate file before ending the drained region.
Previously we closed the vmstate file after ending the drained region.
The order does not matter.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Alberto Garcia
0d2fac8ede throttle: Update throttle-groups.c documentation
There used to be throttle_timers_{detach,attach}_aio_context() calls
in bdrv_set_aio_context(), but since 7ca7f0f6db
they are now in blk_set_aio_context().

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Kevin Wolf
370e8328d7 doc: Document driver-specific -blockdev options
This documents the driver-specific options for the raw, qcow2 and file
block drivers for the man page. For everything else, we refer to the
QAPI documentation.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-06-26 14:51:13 +02:00
Kevin Wolf
dfaca4641c doc: Document generic -blockdev options
This adds documentation for the -blockdev options that apply to all
nodes independent of the block driver used.

All options that are shared by -blockdev and -drive are now explained in
the section for -blockdev. The documentation of -drive mentions that all
-blockdev options are accepted as well.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-06-26 14:51:13 +02:00
Stefan Hajnoczi
8649f2f9b2 migration: use bdrv_drain_all_begin/end() instead bdrv_drain_all()
blk/bdrv_drain_all() only takes effect for a single instant and then
resumes block jobs, guest devices, and other external clients like the
NBD server.  This can be handy when performing a synchronous drain
before terminating the program, for example.

Monitor commands usually need to quiesce I/O across an entire code
region so blk/bdrv_drain_all() is not suitable.  They must use
bdrv_drain_all_begin/end() to mark the region.  This prevents new I/O
requests from slipping in or worse - block jobs completing and modifying
the graph.

I audited other blk/bdrv_drain_all() callers but did not find anything
that needs a similar fix.  This patch fixes the savevm/loadvm commands.
Although I haven't encountered a read world issue this makes the code
safer.

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Stefan Hajnoczi
17e2a4a47d migration: avoid recursive AioContext locking in save_vmstate()
AioContext was designed to allow nested acquire/release calls.  It uses
a recursive mutex so callers don't need to worry about nesting...or so
we thought.

BDRV_POLL_WHILE() is used to wait for block I/O requests.  It releases
the AioContext temporarily around aio_poll().  This gives IOThreads a
chance to acquire the AioContext to process I/O completions.

It turns out that recursive locking and BDRV_POLL_WHILE() don't mix.
BDRV_POLL_WHILE() only releases the AioContext once, so the IOThread
will not be able to acquire the AioContext if it was acquired
multiple times.

Instead of trying to release AioContext n times in BDRV_POLL_WHILE(),
this patch simply avoids nested locking in save_vmstate().  It's the
simplest fix and we should step back to consider the big picture with
all the recent changes to block layer threading.

This patch is the final fix to solve 'savevm' hanging with -object
iothread.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Stefan Hajnoczi
ea17c9d20d block: use BDRV_POLL_WHILE() in bdrv_rw_vmstate()
Calling aio_poll() directly may have been fine previously, but this is
the future, man!  The difference between an aio_poll() loop and
BDRV_POLL_WHILE() is that BDRV_POLL_WHILE() releases the AioContext
around aio_poll().

This allows the IOThread to run fd handlers or BHs to complete the
request.  Failure to release the AioContext causes deadlocks.

Using BDRV_POLL_WHILE() partially fixes a 'savevm' hang with -object
iothread.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Stefan Hajnoczi
dc88a467ec block: count bdrv_co_rw_vmstate() requests
Call bdrv_inc/dec_in_flight() for vmstate reads/writes.  This seems
unnecessary at first glance because vmstate reads/writes are done
synchronously while the guest is stopped.  But we need the bdrv_wakeup()
in bdrv_dec_in_flight() so the main loop sees request completion.
Besides, it's cleaner to count vmstate reads/writes like ordinary
read/write requests.

The bdrv_wakeup() partially fixes a 'savevm' hang with -object iothread.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-26 14:51:13 +02:00
Kevin Wolf
24575bfa8c qemu-iotests: Test exiting qemu with running job
When qemu is exited, all running jobs should be cancelled successfully.
This adds a test for this for all types of block jobs that currently
exist in qemu.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-06-26 14:51:13 +02:00
Kevin Wolf
ecaf8c8a6f qemu-iotests: Allow starting new qemu after cleanup
After _cleanup_qemu(), test cases should be able to start the next qemu
process and call _cleanup_qemu() for that one as well. For this to work
cleanly, we need to improve the cleanup so that the second invocation
doesn't try to kill the qemu instances from the first invocation a
second time (which would result in error messages).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-06-26 14:51:12 +02:00
Kevin Wolf
4f78a16fee commit: Fix completion with extra reference
commit_complete() can't assume that after its block_job_completed() the
job is actually immediately freed; someone else may still be holding
references. In this case, the op blockers on the intermediate nodes make
the graph reconfiguration in the completion code fail.

Call block_job_remove_all_bdrv() manually so that we know for sure that
any blockers on intermediate nodes are given up.

Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-06-26 14:51:12 +02:00
Peter Maydell
b01a4fd3bd configure: Define NCURSES_WIDECHAR if we're using curses
We want the wide character functions from the ncurses header.
Unfortunately it doesn't provide them by default, but only
if either:
 * NCURSES_WIDECHAR is defined (for ncurses 20111030 and up)
 * _XOPEN_SOURCE/_XOPEN_SOURCE_EXTENDED are suitably defined

So far we have been implicitly relying on the latter, because
for GNU libc when we define _GNU_SOURCE this causes libc
to define the _XOPEN_SOURCE macros for us. Unfortunately
this doesn't work on all libcs, because some (like OSX and
musl libc) do not define _XOPEN_SOURCE when _GNU_SOURCE
is defined.

We can't fix this by defining _XOPEN_SOURCE ourselves, because
that also means "and don't provide any functions that aren't in
that standard", and not all libcs provide any way to override
that to also get the non-standard functions. In particular
FreeBSD has no such mechanism, and OSX's _DARWIN_C_SOURCE
doesn't reenable everything (for instance getpagesize()
is still not prototyped if _DARWIN_C_SOURCE and _XOPEN_SOURCE
are both defined).

So we have to define NCURSES_WIDECHAR. (This will only work
if your ncurses is at least 20111030, as older versions
don't honour this macro.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Acked-by: Laszlo Ersek <lersek@redhat.com>
Message-id: 1496414138-7622-1-git-send-email-peter.maydell@linaro.org
2017-06-26 13:21:16 +01:00
Peter Maydell
931892e8a6 Merge remote-tracking branch 'remotes/rth/tags/pull-s390-20170623' into staging
Queued target/s390x patches

# gpg: Signature made Fri 23 Jun 2017 17:18:24 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-s390-20170623:
  target/s390x: Implement idte instruction
  target/s390x: Improve heuristic for ipte
  target/s390x: Indicate and check for local tlb clearing
  target/s390x: Clean up TB flag bits
  target/s390x: Finish implementing ETF2-ENH
  target/s390x: Mark STFLE_49 facility as available
  target/s390x: Implement processor-assist insn
  target/s390x: Implement execution-hint insns
  target/s390x: Mark STFLE_53 facility as available
  target/s390x: Implement load-and-zero-rightmost-byte insns
  target/s390x: Implement load-on-condition-2 insns
  target/s390x: Mark FPSEH facility as available
  target/s390x: implement mvcos instruction
  target/s390x: change PSW_SHIFT_KEY
  target/s390x: Map existing FAC_* names to S390_FEAT_* names

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-23 18:11:48 +01:00
David Hildenbrand
be7f28de5d target/s390x: Implement idte instruction
Let's keep it very simple for now and flush the complete tlb,
we currently can't find the right entries in our tlb, we would have
to store the used tables for each element.

As we now fully implement the DAT-enhancement facility, we can allow to
enable it for the qemu CPU model.

Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170622094151.28633-4-david@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:45 -07:00
David Hildenbrand
97b95aae3b target/s390x: Improve heuristic for ipte
If only the page index is set, most likely we don't have a valid
virtual address. Let's do a full tlb flush for that case.

Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170622094151.28633-3-david@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:45 -07:00
David Hildenbrand
faf1c63d34 target/s390x: Indicate and check for local tlb clearing
Let's allow to enable it for the qemu cpu model and correctly emulate
it.

Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170622094151.28633-2-david@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:45 -07:00
Richard Henderson
159fed45db target/s390x: Clean up TB flag bits
Most of the PSW bits that were being copied into TB->flags
are not relevant to translation.  Removing those that are
unnecessary reduces the amount of translation required.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:45 -07:00
Richard Henderson
3c39c800bf target/s390x: Finish implementing ETF2-ENH
Missed the proper alignment in TRTO/TRTT, and ignoring the M3
field for all TRXX insns without ETF2-ENH.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:44 -07:00
Richard Henderson
afa26f3bae target/s390x: Mark STFLE_49 facility as available
This facility bit includes execution-hint, load-and-trap,
miscellaneous-instruction-extensions and processor-assist.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:44 -07:00
Richard Henderson
632c61a9b8 target/s390x: Implement processor-assist insn
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:44 -07:00
Richard Henderson
6a68acd5b7 target/s390x: Implement execution-hint insns
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:44 -07:00
Richard Henderson
37b8638d43 target/s390x: Mark STFLE_53 facility as available
This facility bit includes load-on-condition-2 and
load-and-zero-rightmost-byte.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:44 -07:00
Richard Henderson
c2a5c1d718 target/s390x: Implement load-and-zero-rightmost-byte insns
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:44 -07:00
Richard Henderson
45aa9aa3b7 target/s390x: Implement load-on-condition-2 insns
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:44 -07:00
Richard Henderson
e1a5d922b4 target/s390x: Mark FPSEH facility as available
This facility bit includes DFP-rounding, FPR-GR-transfer,
FPS-sign-handling, and IEEE-exception-simulation.  We do
support all of these.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 09:17:44 -07:00
David Hildenbrand
3e7e5e0bc1 target/s390x: implement mvcos instruction
This adds support for the MOVE WITH OPTIONAL SPECIFICATIONS (MVCOS)
instruction. Allow to enable it for the qemu cpu model using

qemu-system-s390x ... -cpu qemu,mvcos=on ...

This allows to boot linux kernel that uses it for uacccess.

We are missing (as for most other part) low address protection checks,
PSW key / storage key checks and support for AR-mode.

We fake an ADDRESSING exception when called from problem state (which
seems to rely on PSW key checks to be in place) and if AR-mode is used.
user mode will always see a PRIVILEDGED exception.

This patch is based on an original patch by Miroslav Benes (thanks!).

Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170614133819.18480-3-david@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 08:40:46 -07:00
David Hildenbrand
c8bd95377b target/s390x: change PSW_SHIFT_KEY
Such shifts are usually used to easily extract the PSW KEY from the PSW
mask, so let's avoid the confusing offset of 4.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170614133819.18480-2-david@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 08:40:40 -07:00
Richard Henderson
d20bd43c4c target/s390x: Map existing FAC_* names to S390_FEAT_* names
The FAC_ names were placeholders prior to the introduction
of the current facility modeling.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-23 08:40:28 -07:00
Peter Maydell
14a7fe1a26 Merge remote-tracking branch 'remotes/otubo/tags/pull-seccomp-20170622' into staging
pull-seccomp-20170622

# gpg: Signature made Thu 22 Jun 2017 09:01:01 BST
# gpg:                using RSA key 0xDF32E7C0F0FFF9A2
# gpg: Good signature from "Eduardo Otubo (Senior Software Engineer) <otubo@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: D67E 1B50 9374 86B4 0723  DBAB DF32 E7C0 F0FF F9A2

* remotes/otubo/tags/pull-seccomp-20170622:
  MAINTAINERS: seccomp: change email contact for Eduardo Otubo

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-23 16:19:04 +01:00
Peter Maydell
e499ff0707 Merge remote-tracking branch 'remotes/kraxel/tags/queue/misc-pull-request' into staging
# gpg: Signature made Fri 23 Jun 2017 13:48:04 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/queue/misc-pull-request:
  applesmc: fix port i/o access width
  applesmc: implement error status port
  applesmc: cosmetic whitespace and indentation cleanup

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-23 15:40:09 +01:00
John Arbuckle
e47ec1a9ba ui/cocoa.m: add Speed menu
Programs running inside of QEMU can sometimes use more CPU time than is really
needed. To solve this problem, we just need to throttle the virtual CPU. This
feature will stop laptops from burning up.

This patch adds a menu called Speed that has menu items from 100% to 1% that
represent the speed options. 100% is full speed and 1% is slowest.

Signed-off-by: John Arbuckle <programmingkidx@gmail.com>
Message-id: D6FAAABF-064D-49C0-B572-C73679F34052@gmail.com
[PMM: Moved "mark 100% menu item as checked initially" code to
 after menu item is allocated, not before it]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-23 15:02:40 +01:00
Gabriel L. Somlo
1b8274d4f9 applesmc: fix port i/o access width
Set access width of all AppleSMC i/o regions to 1 byte, since they
all represent 8-bit-wide ports.

Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>
Reviewed-by: Alexander Graf <agraf@suse.de>
Message-id: 1497639316-22202-4-git-send-email-gsomlo@gmail.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-06-23 13:35:01 +02:00
Gabriel L. Somlo
9e507d7e77 applesmc: implement error status port
As of release 10.12.4, OS X (Sierra) refuses to boot unless the
AppleSMC supports an additional I/O port, expected to provide an
error status code.

Update the [cmd|data]_write() and data_read() methods to implement
the required state machine, and add I/O region & methods to handle
access to the error port.

Originally proposed by Eric Shelton <eshelton@pobox.com> based in
part on FakeSMC (git://git.assembla.com/fakesmc.git).

Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>
Reviewed-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Phil Dennis-Jordan <phil@philjordan.eu>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1497639316-22202-3-git-send-email-gsomlo@gmail.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-06-23 13:35:00 +02:00
Gabriel L. Somlo
36bcd0350c applesmc: cosmetic whitespace and indentation cleanup
Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>
Reviewed-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Phil Dennis-Jordan <phil@philjordan.eu>
Message-id: 1497639316-22202-2-git-send-email-gsomlo@gmail.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-06-23 13:35:00 +02:00
Peter Maydell
40b06f5230 Merge remote-tracking branch 'remotes/kraxel/tags/ui-and-input-20170623-pull-request' into staging
# gpg: Signature made Fri 23 Jun 2017 11:39:22 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/ui-and-input-20170623-pull-request:
  ps2: reset queue in ps2_reset_keyboard
  ps2: add ps2_reset_queue
  ps2: add and use PS2State typedef
  sdl2: add assert to make coverity happy
  hid: Reset kbd modifiers on reset
  input: Decrement queue count on kbd delay
  keymaps: add tracing

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-23 12:00:21 +01:00
Gerd Hoffmann
6e24ee0c1e ps2: reset queue in ps2_reset_keyboard
When the guest resets the keyboard also clear the queue.  It is highly
unlikely that the guest is still interested in the events stuck in the
queue, and it avoids confusing the guest in case the queue is full and
the ACK can't be queued up.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1372583
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170606112105.13331-4-kraxel@redhat.com
2017-06-23 11:51:50 +02:00
Gerd Hoffmann
954ee55bd5 ps2: add ps2_reset_queue
Factor out ps2 queue reset to a separate function.
No functional change.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170606112105.13331-3-kraxel@redhat.com
2017-06-23 11:51:50 +02:00
Gerd Hoffmann
8498bb8d2e ps2: add and use PS2State typedef
Cleanup: Create and use a typedef for PS2State and stop passing void
pointers.  No functional change.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170606112105.13331-2-kraxel@redhat.com
2017-06-23 11:51:50 +02:00
Gerd Hoffmann
85970a627f sdl2: add assert to make coverity happy
There is a loop a few lines up counting consoles and setting
sdl2_num_outputs accordingly, so con ptr can't be NULL there.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170621122234.12751-1-kraxel@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-06-23 11:50:46 +02:00
Alexander Graf
51dbea77a2 hid: Reset kbd modifiers on reset
When resetting the keyboard, we need to reset not just the pending keystrokes,
but also any pending modifiers. Otherwise there's a race when we're getting
reset while running an escape sequence (modifier 0x100).

Cc: qemu-stable@nongnu.org
Signed-off-by: Alexander Graf <agraf@suse.de>
Message-id: 1498117295-162030-1-git-send-email-agraf@suse.de
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-06-23 11:50:05 +02:00
Alexander Graf
77b0359bf4 input: Decrement queue count on kbd delay
Delays in the input layer are special cased input events. Every input
event is accounted for in a global intput queue count. The special cased
delays however did not get removed from the queue, leading to queue overruns
and thus silent key drops after typing quite a few characters.

Signed-off-by: Alexander Graf <agraf@suse.de>
Message-id: 1498117318-162102-1-git-send-email-agraf@suse.de
Fixes: be1a7176 ("input: add support for kbd delays")
Cc: qemu-stable@nongnu.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-06-23 11:49:44 +02:00
Gerd Hoffmann
d3b787fa7d keymaps: add tracing
Drop commented debug logging, add trace points instead.

Also cleanup parser code a bit, the key name is copied into a new
variable instead of patching the input line, that way we can log
the unmodified line.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170606134736.26080-1-kraxel@redhat.com
2017-06-23 11:47:59 +02:00
Peter Maydell
4c8c1cc544 Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.10-pull-request' into staging
# gpg: Signature made Wed 21 Jun 2017 22:00:24 BST
# gpg:                using RSA key 0xF30C38BD3F2FBE3C
# gpg: Good signature from "Laurent Vivier <lvivier@redhat.com>"
# gpg:                 aka "Laurent Vivier <laurent@vivier.eu>"
# gpg:                 aka "Laurent Vivier (Red Hat) <lvivier@redhat.com>"
# Primary key fingerprint: CD2F 75DD C8E3 A4DC 2E4F  5173 F30C 38BD 3F2F BE3C

* remotes/vivier/tags/m68k-for-2.10-pull-request:
  target-m68k: add FPCR and FPSR
  target-m68k: define 96bit FP registers for gdb on 680x0
  target-m68k: use floatx80 internally
  target-m68k: initialize FPU registers
  target-m68k: move fmove CR to a function

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-22 19:01:58 +01:00
Peter Maydell
e18a639164 Merge remote-tracking branch 'remotes/kraxel/tags/usb-20170621-pull-request' into staging
# gpg: Signature made Wed 21 Jun 2017 16:43:14 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/usb-20170621-pull-request:
  usb-host: support devices with sparse/non-sequential USB interfaces

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-22 15:09:48 +01:00
Peter Maydell
469819a3e8 Merge remote-tracking branch 'remotes/stsquad/tags/pull-ci-updates-210617-2' into staging
This is mostly Philippe's updates

We add the following cross-compile targets:
  - mipsel-softmmu,mipsel-linux-user,mips64el-linux-user
  - armeb-linux-user

While I was rolling I discovered we could also back out a bunch of the
emdebian hacks as the newly released stretch handles cross compilers
as first class citizens. Unfortunately this also meant I had to drop
the powerpc support as that is no longer in Debian stable.

# gpg: Signature made Wed 21 Jun 2017 15:09:50 BST
# gpg:                using RSA key 0xFBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>"
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8  DF35 FBD0 DB09 5A9E 2A44

* remotes/stsquad/tags/pull-ci-updates-210617-2: (21 commits)
  MAINTAINERS: self-appoint me as reviewer in build/test automation
  MAINTAINERS: add Shippable automation platform URL
  shippable: add mipsel target
  shippable: add armeb-linux-user target
  shippable: be verbose while building docker images
  shippable: do not initialize submodules automatically
  shippable: build using all available cpus
  shippable: use C locale to simplify console output
  docker: add mipsel build target
  docker: add extra libs to s390x target to extend codebase coverage
  docker: add extra libs to arm64 target to extend codebase coverage
  docker: add extra libs to armhf target to extend codebase coverage
  docker: use eatmydata in debian arm64 image
  docker: use eatmydata in debian armhf image
  docker: use eatmydata, install common build packages in base image
  docker: use better regex to generate deb-src entries
  docker: install ca-certificates package in base image
  docker: rebuild image if 'extra files' checksum does not match
  docker: add --include-files argument to 'build' command
  docker: let _copy_with_mkdir() sub_path argument be optional
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-22 14:33:42 +01:00
Peter Maydell
22a9e1fd63 Merge remote-tracking branch 'remotes/kraxel/tags/queue/ui-pull-request' into staging
# gpg: Signature made Wed 21 Jun 2017 14:23:31 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/queue/ui-pull-request:
  ui: Remove inclusion of "hw/qdev.h"
  console: remove do_safe_dpy_refresh
  gtk: use framebuffer helper functions.
  sdl2: use framebuffer helper functions.
  egl-headless: use framebuffer helper functions.
  egl-helpers: add helpers to handle opengl framebuffers

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-22 13:18:11 +01:00
Peter Maydell
84e3d0725b Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-06-09-v2' into staging
QAPI patches for 2017-06-09

# gpg: Signature made Tue 20 Jun 2017 13:31:39 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2017-06-09-v2: (41 commits)
  tests/qdict: check more get_try_int() cases
  console: use get_uint() for "head" property
  i386/cpu: use get_uint() for "min-level"/"min-xlevel" properties
  numa: use get_uint() for "size" property
  pnv-core: use get_uint() for "core-pir" property
  pvpanic: use get_uint() for "ioport" property
  auxbus: use get_uint() for "addr" property
  arm: use get_uint() for "mp-affinity" property
  xen: use get_uint() for "max-ram-below-4g" property
  pc: use get_uint() for "hpet-intcap" property
  pc: use get_uint() for "apic-id" property
  pc: use get_uint() for "iobase" property
  acpi: use get_uint() for "pci-hole*" properties
  acpi: use get_uint() for various acpi properties
  acpi: use get_uint() for "acpi-pcihp-io*" properties
  platform-bus: use get_uint() for "addr" property
  bcm2835_fb: use {get, set}_uint() for "vcram-size" and "vcram-base"
  aspeed: use {set, get}_uint() for "ram-size" property
  pcihp: use get_uint() for "bsel" property
  pc-dimm: make "size" property uint64
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-22 11:34:39 +01:00
Peter Maydell
db7a99cdc1 Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170619' into staging
Queued TCG patches

# gpg: Signature made Mon 19 Jun 2017 19:12:06 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20170619:
  target/arm: Exit after clearing aarch64 interrupt mask
  target/s390x: Exit after changing PSW mask
  target/alpha: Use tcg_gen_lookup_and_goto_ptr
  tcg: Increase hit rate of lookup_tb_ptr
  tcg/arm: Use ldr (literal) for goto_tb
  tcg/arm: Try pc-relative addresses for movi
  tcg/arm: Remove limit on code buffer size
  tcg/arm: Use indirect branch for goto_tb
  tcg/aarch64: Use ADR in tcg_out_movi
  translate-all: consolidate tb init in tb_gen_code
  tcg: allocate TB structs before the corresponding translated code
  util: add cacheinfo

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-22 10:25:03 +01:00
otubo
064983cb0a MAINTAINERS: seccomp: change email contact for Eduardo Otubo
Signed-off-by: Eduardo Otubo <otubo@redhat.com>
2017-06-22 09:58:00 +02:00
Laurent Vivier
ba62494483 target-m68k: add FPCR and FPSR
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170620205121.26515-6-laurent@vivier.eu>
2017-06-21 22:11:55 +02:00
Laurent Vivier
5a4526b26a target-m68k: define 96bit FP registers for gdb on 680x0
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170620205121.26515-5-laurent@vivier.eu>
2017-06-21 22:11:12 +02:00
Laurent Vivier
f83311e476 target-m68k: use floatx80 internally
Coldfire uses float64, but 680x0 use floatx80.
This patch introduces the use of floatx80 internally
and enables 680x0 80bits FPU.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170620205121.26515-4-laurent@vivier.eu>
2017-06-21 22:10:29 +02:00
Laurent Vivier
f4a6ce5155 target-m68k: initialize FPU registers
on reset, set FP registers to NaN and control registers to 0

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170620205121.26515-3-laurent@vivier.eu>
2017-06-21 22:09:45 +02:00
Laurent Vivier
860b9ac779 target-m68k: move fmove CR to a function
Move code of fmove to/from control register to a function

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170620205121.26515-2-laurent@vivier.eu>
2017-06-21 21:57:39 +02:00
Philippe Mathieu-Daudé
32b9ca9868 MAINTAINERS: self-appoint me as reviewer in build/test automation
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:03:06 +01:00
Philippe Mathieu-Daudé
2a747008cb MAINTAINERS: add Shippable automation platform URL
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:03:06 +01:00
Philippe Mathieu-Daudé
92bd1e465b shippable: add mipsel target
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
[AJB: fixups after dropping powerpc]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:03:06 +01:00
Philippe Mathieu-Daudé
492734b5da shippable: add armeb-linux-user target
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:03:06 +01:00
Philippe Mathieu-Daudé
d2a44865e8 shippable: be verbose while building docker images
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:03:06 +01:00
Philippe Mathieu-Daudé
a825ca0613 shippable: do not initialize submodules automatically
instead do it in the 'ci' target when needed.

for mips64el-softmmu target:
use dtc submodule if distrib packages are too old.

example with outdated libfdt on mips64el-softmmu target (required is >= 1.4.2):
 # dpkg-query --showformat='${Version}\n' --show libfdt-dev
 1.4.0+dfsg-1

shippable output:
----------------
  LINK    mips64el-softmmu/qemu-system-mips64el
../hw/core/loader-fit.o: In function `load_fit':
/root/src/github.com/philmd/qemu/hw/core/loader-fit.c:278: undefined reference to `fdt_first_subnode'
/root/src/github.com/philmd/qemu/hw/core/loader-fit.c:286: undefined reference to `fdt_next_subnode'
/root/src/github.com/philmd/qemu/hw/core/loader-fit.c:277: undefined reference to `fdt_first_subnode'
collect2: error: ld returned 1 exit status
Makefile:201: recipe for target 'qemu-system-mips64el' failed
make[1]: *** [qemu-system-mips64el] Error 1
Makefile:327: recipe for target 'subdir-mips64el-softmmu' failed
make: *** [subdir-mips64el-softmmu] Error 2

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:03:06 +01:00
Philippe Mathieu-Daudé
a08fc2f8cc shippable: build using all available cpus
As of this commit:

$ echo "container proc:" `getconf _NPROCESSORS_ONLN` `getconf _NPROCESSORS_CONF`
container proc: 2 2

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:03:05 +01:00
Philippe Mathieu-Daudé
c34647c18a shippable: use C locale to simplify console output
remove this noise:

perl: warning: Setting locale failed.
perl: warning: Please check that your locale settings:
    LANGUAGE = (unset),
    LC_ALL = "en_US.UTF-8",
    LC_CTYPE = "en_US.UTF-8",
    LANG = "en_US.UTF-8"
    are supported and installed on your system.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:03:05 +01:00
Philippe Mathieu-Daudé
2e1d6bdcce docker: add mipsel build target
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
[AJB: remove apt-fake kludge]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:02:43 +01:00
Philippe Mathieu-Daudé
c9c06eb832 docker: add extra libs to s390x target to extend codebase coverage
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Philippe Mathieu-Daudé
905bf0ee8a docker: add extra libs to arm64 target to extend codebase coverage
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Philippe Mathieu-Daudé
32809e7f7b docker: add extra libs to armhf target to extend codebase coverage
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Philippe Mathieu-Daudé
8a98bfc6e3 docker: use eatmydata in debian arm64 image
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Philippe Mathieu-Daudé
8a48be0e87 docker: use eatmydata in debian armhf image
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Philippe Mathieu-Daudé
96e659d006 docker: use eatmydata, install common build packages in base image
The common build packages are: build-essential clang git bison flex

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
[AJB: fixups following stretch update]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Philippe Mathieu-Daudé
cf80eb8d09 docker: use better regex to generate deb-src entries
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
[AJB: fixed up following dropping emdebian]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Philippe Mathieu-Daudé
2c1c31ed55 docker: install ca-certificates package in base image
Resolve SSL verification issue at shippable container's git_sync stage:

shippable logs:
--------------
git_sync
- ssh-agent bash -c 'ssh-add /tmp/ssh/01_deploy; git clone https://github.com/philmd/qemu.git /root/src/github.com/philmd/qemu'
Identity added: /tmp/ssh/01_deploy (rsa w/o comment)
Cloning into '/root/src/github.com/philmd/qemu'...
fatal: unable to access 'https://github.com/philmd/qemu.git/': Problem with the SSL CA cert (path? access rights?)
retrying 1 of 3 times...

Suggested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
[AJB: fixed re-base conflict following stretch updates]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Philippe Mathieu-Daudé
438d116872 docker: rebuild image if 'extra files' checksum does not match
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Philippe Mathieu-Daudé
4c84f662c2 docker: add --include-files argument to 'build' command
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Philippe Mathieu-Daudé
2499ee9fad docker: let _copy_with_mkdir() sub_path argument be optional
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Alex Bennée
7af25f9f6a docker: update qemu:debian base following stretch release
Debian has now released Stretch as its new stable. As we track
debian:stable-slim this has a few consequences. For one thing we can
now drop the emdebian hacks as cross compilers are part of the
official repositories now. However we do loose the ability to build
against powerpc (not ppc64) since that is no longer a release
architecture.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-06-21 15:01:48 +01:00
Samuel Brian
896b6757f9 usb-host: support devices with sparse/non-sequential USB interfaces
Some USB devices have sparse interface numbering which is not able to be
passthroughed.
For example, the Sierra Wireless MC7455/MC7430:

  # lsusb  -D /dev/bus/usb/003/003 | egrep '1199|9071|bNumInterfaces|bInterfaceNumber'
  Device: ID 1199:9071 Sierra Wireless, Inc.
    idVendor           0x1199 Sierra Wireless, Inc.
    idProduct          0x9071
      bNumInterfaces          5
        bInterfaceNumber        0
        bInterfaceNumber        2
        bInterfaceNumber        3
        bInterfaceNumber        8
        bInterfaceNumber       10

In this case, the interface numbers are 0, 2, 3, 8, 10 and not the
0, 1, 2, 3, 4 that QEMU tries to claim.

This change allows sparse USB interface numbering.
Instead of only claiming the interfaces in the range reported by the USB
device through bNumInterfaces, QEMU attempts to claim all possible
interfaces.

v2 to fix broken v1 patch formatting.
v3 to fix indentation.

Signed-off-by: Samuel Brian <sam.brian@accelerated.com>
Message-id: 20170613234039.27201-1-sam.brian@accelerated.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-06-21 15:30:08 +02:00
Thomas Huth
95e92000c8 ui: Remove inclusion of "hw/qdev.h"
Looks like #include "hw/qdev.h" is not needed here, so remove it.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-id: 1497894617-12143-1-git-send-email-thuth@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-06-21 14:26:15 +02:00
Gerd Hoffmann
3f8f1313e0 console: remove do_safe_dpy_refresh
Drop the temporary workaround for the broken display updates.
All display adapters are updated, so this should be safe without
causing regressions.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 20170614084538.32480-1-kraxel@redhat.com
2017-06-21 14:24:22 +02:00
Gerd Hoffmann
a4f113fd69 gtk: use framebuffer helper functions.
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170614084149.31314-5-kraxel@redhat.com
2017-06-21 14:23:16 +02:00
Gerd Hoffmann
371c4ef637 sdl2: use framebuffer helper functions.
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170614084149.31314-4-kraxel@redhat.com
2017-06-21 14:23:16 +02:00
Gerd Hoffmann
d8dc67e119 egl-headless: use framebuffer helper functions.
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170614084149.31314-3-kraxel@redhat.com
2017-06-21 14:23:16 +02:00
Gerd Hoffmann
6fafc26014 egl-helpers: add helpers to handle opengl framebuffers
Add a collection of egl_fb_*() helper functions to manage and use opengl
framebuffers, which is a common pattern in UI code with opengl support.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170614084149.31314-2-kraxel@redhat.com
2017-06-21 14:23:16 +02:00
Philippe Mathieu-Daudé
8dfaf23ae1 tcg/tci: fix tcg-interpreter build
fix regression from commit 244f144134:

    $ make subdir-arm-softmmu
    make[1]: *** No rule to make target 'tci.o', needed by 'qemu-system-arm'.  Stop.
    Makefile:328: recipe for target 'subdir-arm-softmmu' failed
    make: *** [subdir-arm-softmmu] Error 2

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170620163009.21764-1-f4bug@amsat.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-20 18:39:15 +01:00
Peter Maydell
e85c0d1401 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc: fixes, cleanups, features

Some fixes and cleanups. Extended TSEG sizes.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 16 Jun 2017 16:45:07 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  hw/i386: fix nvdimm check error path
  intel_iommu: cleanup vtd_interrupt_remap_msi()
  intel_iommu: cleanup vtd_{do_}iommu_translate()
  intel_iommu: switching the rest DPRINTF to trace
  tests/q35-test: add TSEG size checks
  tests/q35-test: push down qtest_start / qtest_end to test case(s)
  q35/mch: implement extended TSEG sizes

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-20 17:12:41 +01:00
Peter Maydell
65a0e3e842 Merge remote-tracking branch 'remotes/famz/tags/docker-and-block-pull-request' into staging
# gpg: Signature made Fri 16 Jun 2017 01:18:46 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-and-block-pull-request: (23 commits)
  block: make accounting thread-safe
  block: split BlockAcctStats creation and setup
  block: introduce block_account_one_io
  block: protect modification of dirty bitmaps with a mutex
  migration/block: reset dirty bitmap before reading
  block: introduce dirty_bitmap_mutex
  block: protect tracked_requests and flush_queue with reqs_lock
  block: access write_gen with atomics
  block: use Stat64 for wr_highest_offset
  util: add stats64 module
  throttle-groups: protect throttled requests with a CoMutex
  throttle-groups: do not use qemu_co_enter_next
  throttle-groups: only start one coroutine from drained_begin
  block: access io_plugged with atomic ops
  block: access wakeup with atomic ops
  block: access serialising_in_flight with atomic ops
  block: access io_limits_disabled with atomic ops
  block: access quiesce_counter with atomic ops
  block: access copy_on_read with atomic ops
  docker: Add flex and bison to centos6 image
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-20 16:01:15 +01:00
Peter Maydell
7e56accdaf Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* nbd and qemu-nbd fixes (Eric, Max)
* nbd refactoring (Vladimir)
* vhost-user-scsi, take N+1 (Felipe)
* replace memory_region_set_fd with memory_region_init_ram_from_fd (Marc-André)
* docs/ movement (Paolo)
* megasas TOCTOU fixes (Paolo)
* make async_safe_run_on_cpu work on kvm/hax accelerators (Paolo)
* Build system and poison.h improvements (Thomas)
* -accel thread=xxx fix (Thomas)
* move files to accel/ (Yang Zhong)

# gpg: Signature made Thu 15 Jun 2017 10:51:55 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (41 commits)
  vhost-user-scsi: Introduce a vhost-user-scsi sample application
  vhost-user-scsi: Introduce vhost-user-scsi host device
  qemu-doc: include version number
  docs: create interop/ subdirectory
  include/exec/poison: Mark some CONFIG defines as poisoned, too
  include/exec/poison: Add missing TARGET defines
  nbd/server: refactor nbd_trip
  nbd/server: rename rc to ret
  nbd/server: get rid of fail: return rc
  nbd/server: nbd_negotiate: fix error path
  nbd/server: remove NBDClientNewData
  nbd/server: refactor nbd_co_receive_request
  nbd/server: get rid of EAGAIN dead code
  nbd/server: refactor nbd_co_send_reply
  nbd/server: get rid of ssize_t
  nbd/server: get rid of nbd_negotiate_read and friends
  nbd: make nbd_drop public
  nbd: rename read_sync and friends
  accel: move kvm related accelerator files into accel/
  tcg: move tcg backend files into accel/tcg/
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-20 14:20:34 +01:00
Marc-André Lureau
269c20b2bb tests/qdict: check more get_try_int() cases
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Suggested-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170607163635.17635-42-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
ad664c1d4c console: use get_uint() for "head" property
TYPE_QEMU_CONSOLE property "head" is defined with
object_property_add_uint*_ptr().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-41-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
709fa704f6 i386/cpu: use get_uint() for "min-level"/"min-xlevel" properties
These are properties of TYPE_X86_CPU, defined with DEFINE_PROP_UINT32()

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-40-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
61d7c14437 numa: use get_uint() for "size" property
"size" is a property of TYPE_MEMORY_BACKEND.
host_memory_backend_get_size() and host_memory_backend_set_size() use
visit_type_size().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-39-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
9848619a3b pnv-core: use get_uint() for "core-pir" property
This is an alias of TYPE_PNV_CORE's property "pir", which is defined
with DEFINE_PROP_UINT32()

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-38-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
07fe095452 pvpanic: use get_uint() for "ioport" property
TYPE_ISA_PVPANIC_DEVICE's property PVPANIC_IOPORT_PROP is defined with
DEFINE_PROP_UINT16().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-37-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
1eb4243434 auxbus: use get_uint() for "addr" property
This is TYPE_MEMORY_REGION's property.  Its getter
memory_region_get_addr() uses visit_type_uint64().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-36-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
77a7a36760 arm: use get_uint() for "mp-affinity" property
TYPE_ARM_CPU's property "mp-affinity" is defined with
DEFINE_PROP_UINT64().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-35-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
4ccd89d294 xen: use get_uint() for "max-ram-below-4g" property
TYPE_PC_MACHINE's property PC_MACHINE_MAX_RAM_BELOW_4G's getter and
setter pc_machine_get_max_ram_below_4g() and
pc_machine_set_max_ram_below_4g() use visit_type_size()

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-34-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
5d7fb0f254 pc: use get_uint() for "hpet-intcap" property
TYPE_HPET's property HPET_INTCAP is defined with DEFINE_PROP_UINT32().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-33-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
c7b4efb4a0 pc: use get_uint() for "apic-id" property
TYPE_X86_CPU's property "apic-id" is defined with DEFINE_PROP_UINT32().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-32-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
1ea1572adf pc: use get_uint() for "iobase" property
TYPE_ISA_FDC's property "iobase" is defined with DEFINE_PROP_UINT32().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-31-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
605553654f acpi: use get_uint() for "pci-hole*" properties
Those properties use visit_type_uint*()

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-30-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
b81bdbf3c7 acpi: use get_uint() for various acpi properties
PIIX4: piix4_pm_add_propeties() defines these with
object_property_add_uint*_ptr().

Q35: ich9_lpc_add_properties() and ich9_pm_add_properties() define them
similarly, except for ACPI_PM_PROP_GPE0_BLK().  That one's getter
ich9_pm_get_gpe0_blk() uses visit_type_uint32().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-29-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:33 +02:00
Marc-André Lureau
35f91e5069 acpi: use get_uint() for "acpi-pcihp-io*" properties
Those are defined with object_property_add_uint16_ptr()

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-28-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
6d13643a3a platform-bus: use get_uint() for "addr" property
This is TYPE_MEMORY_REGION's property.  Its getter
memory_region_get_addr() uses visit_type_uint64().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-27-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
c5c6c47ce3 bcm2835_fb: use {get, set}_uint() for "vcram-size" and "vcram-base"
Both properties are defined with DEFINE_PROP_UINT32().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-26-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
19e9cdf040 aspeed: use {set, get}_uint() for "ram-size" property
This property is an alias for device TYPE_ASPEED_SDMC's property
"ram-size", which is defined with DEFINE_PROP_UINT64().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-25-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
c03d83d55a pcihp: use get_uint() for "bsel" property
The property is defined with object_property_add_uint32_ptr()

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-24-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
b053ef6106 pc-dimm: make "size" property uint64
This carries the memory_region_size() value without implicit cast.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-23-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
9ed442b8ae pc-dimm: use get_uint() for dimm properties
TYPE_PC_DIMM's property PC_DIMM_ADDR_PROP is defined with
DEFINE_PROP_UINT64().

TYPE_PC_DIMM's property PC_DIMM_NODE_PROP is defined with
DEFINE_PROP_UINT32().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-22-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
822335eb51 isa: use get_uint() for "io-base"
The property is defined with DEFINE_PROP_UINT32().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-21-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
446de8b68a qdev: Use appropriate getter/setters type
Based on the underlying type of the data accessed, use the appropriate
getters/setters:

* AcpiPmInfo members s3_disabled, s4_disabled are bool, member s4_val is
  an uint8_t

* Property ACPI_PCIHP_IO_PROP is defined with
  object_property_add_uint32_ptr()

* Property PCIE_HOST_MCFG_SIZE is implemented with visit_type_uint64()

* PCIDevice property "addr" is backed by PCIDevice member devfn, which
  is an int32_t

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-20-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[More verbose commit message]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
d528227d4c apic-common: make "id" property a uint32
The getter and setter of TYPE_APIC_COMMON property "id" are
apic_common_get_id() and apic_common_set_id().

apic_common_get_id() reads either APICCommonState member uint32_t
initial_apic_id or uint8_t id into an int64_t local variable.  It then
passes this variable to visit_type_int().

apic_common_set_id() uses visit_type_int() to read the value into a
local variable, which it then assigns both to initial_apic_id and id.

While the state backing the property is two unsigned members, 8 and 32
bits wide, the actual visitor is 64 bits signed.

Change getter and setter to use visit_type_uint32().  Then everything's
uint32_t, except for @id.

Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-19-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
3fb2111fc9 qdev: avoid type casts between signed and unsigned
Modify the unsigned type for various properties to use QNUM_U64, to
avoid type casts.

There are a few empty lines added to improve code reading/style.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-18-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Change to set_default_value_enum() dropped]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
76318657a8 qdev: wrap default property value in an union
Wrap the Property default value (an int64_t) in a union, to prepare
for the next patch adding a uint64_t.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-17-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
85bbd1e7a4 qdev: Rename DEFINE_PROP_DEFAULT() to DEFINE_PROP_SIGNED()
The rename prepares for the patch after next's DEFINE_PROP_UNSIGNED().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-16-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Commit message tweaked]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
1e507bb0fd object: use more specific property type names
Use the actual unsigned integer type name.

The type name change impacts the following externally visible area:

* vl.c's machine_help_func() puts it in help for -machine NAME,help.

* QMP command qom-list exposes it in ObjectPropertyInfo member @type.

* QMP command device-list-properties exposes it in DevicePropertyInfo
  member @type.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170607163635.17635-15-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
d015c4ea6f q35: fix get_mmcfg_size to use uint64 visitor
e->size is hwaddr, i.e. uint64_t. We silently truncate.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Suggested-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170607163635.17635-14-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
3152779cd6 object: add uint property setter/getter
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170607163635.17635-13-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:32 +02:00
Marc-André Lureau
5923f85fb8 qapi: update the qobject visitor to use QNUM_U64
Switch to use QNum/uint where appropriate to remove i64 limitation.

The input visitor will cast i64 input to u64 for compatibility
reasons (existing json QMP client already use negative i64 for large
u64, and expect an implicit cast in qemu).

Note: before the patch, uint64_t values above INT64_MAX are sent over
json QMP as negative values, e.g. UINT64_MAX is sent as -1. After the
patch, they are sent unmodified.  Clearly a bug fix, but we have to
consider compatibility issues anyway.  libvirt should cope fine,
because its parsing of unsigned integers accepts negative values
modulo 2^64.  There's hope that other clients will, too.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170607163635.17635-12-marcandre.lureau@redhat.com>
[check_native_list() tweaked for consistency with signed case]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:31 +02:00
Marc-André Lureau
2bc7cfea09 json: learn to parse uint64 numbers
Switch strtoll() usage to qemu_strtoi64() helper while at it.

Add a few tests for large numbers.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-11-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:31 +02:00
Marc-André Lureau
61a8f418b2 qnum: add uint type
In order to store integer values between INT64_MAX and UINT64_MAX, add
a uint64_t internal representation.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170607163635.17635-10-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:31 +02:00
Marc-André Lureau
36aeb6094f tests: remove /{qnum, qlist, dict}/destroy test
The tests aren't really useful, or already covered by other simple tests.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-9-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:31 +02:00
Marc-André Lureau
60390d2dc8 qapi: Remove visit_start_alternate() parameter promote_int
Before the previous commit, parameter promote_int = true made
visit_start_alternate() with an input visitor avoid QTYPE_QINT
variants and create QTYPE_QFLOAT variants instead.  This was used
where QTYPE_QINT variants were invalid.

The previous commit fused QTYPE_QINT with QTYPE_QFLOAT, rendering
promote_int useless and unused.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170607163635.17635-8-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:31 +02:00
Marc-André Lureau
01b2ffcedd qapi: merge QInt and QFloat in QNum
We would like to use a same QObject type to represent numbers, whether
they are int, uint, or floats. Getters will allow some compatibility
between the various types if the number fits other representations.

Add a few more tests while at it.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-7-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[parse_stats_intervals() simplified a bit, comment in
test_visitor_in_int_overflow() tidied up, suppress bogus warnings]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-20 14:31:31 +02:00
Peter Maydell
5135a1056d Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.10-pull-request' into staging
# gpg: Signature made Thu 15 Jun 2017 09:16:31 BST
# gpg:                using RSA key 0xF30C38BD3F2FBE3C
# gpg: Good signature from "Laurent Vivier <lvivier@redhat.com>"
# gpg:                 aka "Laurent Vivier <laurent@vivier.eu>"
# gpg:                 aka "Laurent Vivier (Red Hat) <lvivier@redhat.com>"
# Primary key fingerprint: CD2F 75DD C8E3 A4DC 2E4F  5173 F30C 38BD 3F2F BE3C

* remotes/vivier/tags/m68k-for-2.10-pull-request:
  target-m68k: define ext_opsize
  target-m68k: move FPU helpers to fpu_helper.c
  softfloat: define 680x0 specific values
  target/m68k: fix V flag for CC_OP_SUBx

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-20 11:14:16 +01:00
Peter Maydell
5837aaac25 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Wed 14 Jun 2017 22:54:41 BST
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  block/iscsi: enable filename option and parsing
  block/rbd: enable filename option and parsing

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-20 09:07:43 +01:00
Richard Henderson
8da54b2507 target/arm: Exit after clearing aarch64 interrupt mask
Exit to cpu loop so we reevaluate cpu_arm_hw_interrupts.

Tested-by: Emilio G. Cota <cota@braap.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Emilio G. Cota <cota@braap.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:11:26 -07:00
Richard Henderson
542f70c22e target/s390x: Exit after changing PSW mask
Exit to cpu loop so we reevaluate cpu_s390x_hw_interrupts.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:11:25 -07:00
Richard Henderson
54e1d4ed1d target/alpha: Use tcg_gen_lookup_and_goto_ptr
Tested-by: Emilio G. Cota <cota@braap.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:11:25 -07:00
Richard Henderson
b97a879de9 tcg: Increase hit rate of lookup_tb_ptr
We can call tb_htable_lookup even when the tb_jmp_cache is completely
empty.  Therefore, un-nest most of the code dependent on tb != NULL
from the read from the cache.

This improves the hit rate of lookup_tb_ptr; for instance, when booting
and immediately shutting down debian-arm, the hit rate improves from
93.2% to 99.4%.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:11:25 -07:00
Richard Henderson
308714e6bc tcg/arm: Use ldr (literal) for goto_tb
The new placement of the TB means that we can use one insn
to load the goto_tb destination directly from the TB.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:10:59 -07:00
Richard Henderson
9c39b94f14 tcg/arm: Try pc-relative addresses for movi
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:10:59 -07:00
Richard Henderson
acb0b292b6 tcg/arm: Remove limit on code buffer size
Since we're no longer using a direct branch, we have no
limit on the branch distance.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:10:59 -07:00
Richard Henderson
3fb53fb4d1 tcg/arm: Use indirect branch for goto_tb
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:10:59 -07:00
Richard Henderson
cc74d332ff tcg/aarch64: Use ADR in tcg_out_movi
The new placement of the TB means that we can use one insn
to load the return value for exit_tb returning the TB pointer.

Tested-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:10:59 -07:00
Emilio G. Cota
2b48e10f88 translate-all: consolidate tb init in tb_gen_code
We are partially initializing tb in tb_alloc. Instead, fully
initialize it in tb_gen_code, which is tb_alloc's only caller.

This saves an unnecessary write to tb->cflags.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1497038122-26364-1-git-send-email-cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:10:59 -07:00
Emilio G. Cota
6e3b2bfd6a tcg: allocate TB structs before the corresponding translated code
Allocating an arbitrarily-sized array of tbs results in either
(a) a lot of memory wasted or (b) unnecessary flushes of the code
cache when we run out of TB structs in the array.

An obvious solution would be to just malloc a TB struct when needed,
and keep the TB array as an array of pointers (recall that tb_find_pc()
needs the TB array to run in O(log n)).

Perhaps a better solution, which is implemented in this patch, is to
allocate TB's right before the translated code they describe. This
results in some memory waste due to padding to have code and TBs in
separate cache lines--for instance, I measured 4.7% of padding in the
used portion of code_gen_buffer when booting aarch64 Linux on a
host with 64-byte cache lines. However, it can allow for optimizations
in some host architectures, since TCG backends could safely assume that
the TB and the corresponding translated code are very close to each
other in memory. See this message by rth for a detailed explanation:

  https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg05172.html
  Subject: Re: GSoC 2017 Proposal: TCG performance enhancements
  Message-ID: <1e67644b-4b30-887e-d329-1848e94c9484@twiddle.net>

Suggested-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1496790745-314-3-git-send-email-cota@braap.org>
[rth: Simplify the arithmetic in tcg_tb_alloc]
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:10:59 -07:00
Emilio G. Cota
b255b2c8a5 util: add cacheinfo
Add helpers to gather cache info from the host at init-time.

For now, only export the host's I/D cache line sizes, which we
will use to improve cache locality to avoid false sharing.

Suggested-by: Richard Henderson <rth@twiddle.net>
Suggested-by: Geert Martin Ijewski <gm.ijewski@web.de>
Tested-by:    Geert Martin Ijewski <gm.ijewski@web.de>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1496794624-4083-1-git-send-email-cota@braap.org>
[rth: Move all implementations from tcg/ppc/]
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-19 11:10:59 -07:00
Peter Maydell
30ff7d1d0b Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20170614-1' into staging
ui: prefer gtk3 and sdl2, various fixes.

# gpg: Signature made Wed 14 Jun 2017 08:54:22 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-ui-20170614-1:
  spice: don't enter opengl mode in case another UI provides opengl support
  sdl: prefer sdl2 over sdl1
  gtk: prefer gtk3 over gtk2
  spice: Use proper enum type for kbd led state
  Improve Cocoa modifier key handling

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-19 18:35:07 +01:00
Peter Maydell
cef8fd6836 Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20170614-1' into staging
usb: bugfixes for ehci and xhci

# gpg: Signature made Wed 14 Jun 2017 08:44:01 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20170614-1:
  ehci: stop recursive calls to ehci_work_bh
  xhci: only update dequeue ptr on completed transfers

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-19 17:54:30 +01:00
Fam Zheng
a1fbe750fd migration: Fix race of image locking between src and dst
Previously, dst side will immediately try to lock the write byte upon
receiving QEMU_VM_EOF, but at src side, bdrv_inactivate_all() is only
done after sending it. If the src host is under load, dst may fail to
acquire the lock due to racing with the src unlocking it.

Fix this by hoisting the bdrv_inactivate_all() operation before
QEMU_VM_EOF.

N.B. A further improvement could possibly be done to cleanly handover
locks between src and dst, so that there is no window where a third QEMU
could steal the locks and prevent src and dst from running.

N.B. This commit includes a minor improvement to the error handling
by using qemu_file_set_error().

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 20170616160658.32290-1-famz@redhat.com
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
[PMM: noted qemu_file_set_error() use in commit as suggested by Daniel]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-19 17:53:33 +01:00
Marc-André Lureau
58634047b7 qapi: Clean up qobject_input_type_number() control flow
Use the more common pattern to error out.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-6-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Commit message tweaked]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-19 14:56:29 +02:00
Marc-André Lureau
c1214ad3dc tests: add more int/number ranges checks
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170607163635.17635-5-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[test_visitor_in_uint() tightened slightly]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-19 14:56:29 +02:00
Marc-André Lureau
7c877c8030 tests: Remove test cases for alternates of 'number' and 'int'
Alternates with both a 'number' and an 'int' branch will become
invalid when the next patch merges of QFloat and QInt into QNum.
More sophisticated alternate code could keep them valid, but since
we have no users outside tests, simply drop the tests.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170607163635.17635-4-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-19 14:56:29 +02:00
Marc-André Lureau
560f19f162 object: fix potential leak in getters
If the property is not of the requested type, the getters will leak a
QObject.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170607163635.17635-3-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-19 14:56:29 +02:00
Marc-André Lureau
a2740ad584 qdev: remove PropertyInfo.qtype field
Remove dependency on qapi qtype, replace a field by a few PropertyInfo
callbacks to set the default value type (introduced in commit 4f2d3d7).

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170607163635.17635-2-marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-19 14:56:29 +02:00
Stefan Hajnoczi
7f3cf2d6e7 hw/i386: fix nvdimm check error path
Commit e987c37aee ("hw/i386: check if
nvdimm is enabled before plugging") introduced a check to reject nvdimm
hotplug if -machine pc,nvdimm=on was not given.

This check executes after pc_dimm_memory_plug() has already completed
and does not reverse the effect of this function in the case of failure.

Perform the check before calling pc_dimm_memory_plug().  This fixes the
following abort:

  $ qemu -M accel=kvm -m 1G,slots=4,maxmem=8G \
         -object memory-backend-file,id=mem1,share=on,mem-path=nvdimm.dat,size=1G
  (qemu) device_add nvdimm,memdev=mem1
  nvdimm is not enabled: missing 'nvdimm' in '-M'
  (qemu) device_add nvdimm,memdev=mem1
  Core dumped

The backtrace is:

  #0  0x00007fffdb5b191f in raise () at /lib64/libc.so.6
  #1  0x00007fffdb5b351a in abort () at /lib64/libc.so.6
  #2  0x00007fffdb5a9da7 in __assert_fail_base () at /lib64/libc.so.6
  #3  0x00007fffdb5a9e52 in  () at /lib64/libc.so.6
  #4  0x000055555577a5fa in qemu_ram_set_idstr (new_block=0x555556747a00, name=<optimized out>, dev=dev@entry=0x555556705590) at qemu/exec.c:1709
  #5  0x0000555555a0fe86 in vmstate_register_ram (mr=mr@entry=0x55555673a0e0, dev=dev@entry=0x555556705590) at migration/savevm.c:2293
  #6  0x0000555555965088 in pc_dimm_memory_plug (dev=dev@entry=0x555556705590, hpms=hpms@entry=0x5555566bb0e0, mr=mr@entry=0x555556705630, align=<optimized out>, errp=errp@entry=0x7fffffffc660)
      at hw/mem/pc-dimm.c:110
  #7  0x000055555581d89b in pc_dimm_plug (errp=0x7fffffffc6c0, dev=0x555556705590, hotplug_dev=<optimized out>) at qemu/hw/i386/pc.c:1713
  #8  0x000055555581d89b in pc_machine_device_plug_cb (hotplug_dev=<optimized out>, dev=0x555556705590, errp=0x7fffffffc6c0) at qemu/hw/i386/pc.c:2004
  #9  0x0000555555914da6 in device_set_realized (obj=<optimized out>, value=<optimized out>, errp=0x7fffffffc7e8) at hw/core/qdev.c:926

Cc: Haozhong Zhang <haozhong.zhang@intel.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-16 18:44:56 +03:00
Peter Xu
e7a3b91fdf intel_iommu: cleanup vtd_interrupt_remap_msi()
Move the memcpy upper into where needed, then share the trace so that we
trace every correct remapping.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-16 18:44:56 +03:00
Peter Xu
b9313021f3 intel_iommu: cleanup vtd_{do_}iommu_translate()
First, let vtd_do_iommu_translate() return a status, so that we
explicitly knows whether error occured. Meanwhile, we make sure that
IOMMUTLBEntry is filled in in that.

Then, cleanup vtd_iommu_translate a bit. So even with PT we'll get a log
now. Also, remove useless assignments.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-16 18:44:55 +03:00
Peter Xu
7feb51b709 intel_iommu: switching the rest DPRINTF to trace
We have converted many of the DPRINTF() into traces. This patch does the
last 100+ ones.

To debug VT-d when error happens, let's try enable:

  -trace enable="vtd_err*"

This should works just like the old GENERAL but of course better, since
we don't need to recompile.

Similar rules apply to the other modules. I was trying to make the
prefix good enough for sub-module debugging.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-16 18:44:55 +03:00
Laszlo Ersek
e691ef6991 tests/q35-test: add TSEG size checks
These checks verify that the guest RAM turns from read-write to
"blackhole" when crossing the low boundary of the TSEG. Both the standard
1MB/2MB/8MB TSEG sizes and an extended (16MB) TSEG size are tested.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-16 18:44:55 +03:00
Laszlo Ersek
8bbf4aa96e tests/q35-test: push down qtest_start / qtest_end to test case(s)
A test program can start up QEMU several times, with different command
lines. For such cases, qtest_start() and qtest_end() are called from
within the individual test functions. Examples: "virtio-console-test.c",
"numa-test.c", and many others.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-16 18:44:55 +03:00
Laszlo Ersek
2f295167e0 q35/mch: implement extended TSEG sizes
The q35 machine type currently lets the guest firmware select a 1MB, 2MB
or 8MB TSEG (basically, SMRAM) size. In edk2/OVMF, we use 8MB, but even
that is not enough when a lot of VCPUs (more than approx. 224) are
configured -- SMRAM footprint scales largely proportionally with VCPU
count.

Introduce a new property for "mch" called "extended-tseg-mbytes", which
expresses (in megabytes) the user's choice of TSEG (SMRAM) size.

Invent a new, QEMU-specific register in the config space of the DRAM
Controller, at offset 0x50, in order to allow guest firmware to query the
TSEG (SMRAM) size.

According to Intel Document Number 316966-002, Table 5-1 "DRAM Controller
Register Address Map (D0:F0)":

    Warning: Address locations that are not listed are considered Intel
             Reserved registers locations. Reads to Reserved registers may
             return non-zero values. Writes to reserved locations may
             cause system failures.

             All registers that are defined in the PCI 2.3 specification,
             but are not necessary or implemented in this component are
             simply not included in this document. The
             reserved/unimplemented space in the PCI configuration header
             space is not documented as such in this summary.

Offsets 0x50 and 0x51 are not listed in Table 5-1. They are also not part
of the standard PCI config space header. And they precede the capability
list as well, which starts at 0xe0 for this device.

When the guest writes value 0xffff to this register, the value that can be
read back is that of "mch.extended-tseg-mbytes" -- unless it remains
0xffff. The guest is required to write 0xffff first (as opposed to a
read-only register) because PCI config space is generally not cleared on
QEMU reset, and after S3 resume or reboot, new guest firmware running on
old QEMU could read a guest OS-injected value from this register.

After reading the available "extended" TSEG size, the guest firmware may
actually request that TSEG size by writing pattern 11b to the ESMRAMC
register's TSEG_SZ bit-field. (The Intel spec referenced above defines
only patterns 00b (1MB), 01b (2MB) and 10b (8MB); 11b is reserved.)

On the QEMU command line, the value can be set with

  -global mch.extended-tseg-mbytes=N

The default value for 2.10+ q35 machine types is 16. The value is limited
to 0xfff (4095) at the moment, purely so that the product (4095 MB) can be
stored to the uint32_t variable "tseg_size" in mch_update_smram(). Users
are responsible for choosing sensible TSEG sizes.

On 2.9 and earlier q35 machine types, the default value is 0. This lets
the 11b bit pattern in ESMRAMC.TSEG_SZ, and the register at offset 0x50,
keep their original behavior.

When "extended-tseg-mbytes" is nonzero, the new register at offset 0x50 is
set to that value on reset, for completeness.

PCI config space is migrated automatically, so no VMSD changes are
necessary.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1447027
Ref: https://lists.01.org/pipermail/edk2-devel/2017-May/010456.html
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-16 18:07:08 +03:00
Paolo Bonzini
5b50bf77ce block: make accounting thread-safe
I'm not trying too hard yet.  Later, with multiqueue support,
this may cause mutex contention or cacheline bouncing.

Cc: Alberto Garcia <berto@igalia.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-20-pbonzini@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
9caa6f3dbe block: split BlockAcctStats creation and setup
block_acct_destroy is called unconditionally in blk_delete, but there is
no BlockAcctStats function that is called unconditionally in blk_new.
Split block_acct_init in two, so that it will be possible to create a
QemuMutex in block_acct_init and destroy it in block_acct_cleanup.

Cc: Alberto Garcia <berto@igalia.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-19-pbonzini@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
39c1b4254e block: introduce block_account_one_io
This is the common code to account operations that produced actual I/O.

Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-18-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
b64bd51efa block: protect modification of dirty bitmaps with a mutex
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-17-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
c0bad49946 migration/block: reset dirty bitmap before reading
Any data that is returned by read may be stale already, the bitmap
has to be cleared before issuing the read.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-16-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
2119882c7e block: introduce dirty_bitmap_mutex
It protects only the list of dirty bitmaps; in the next patch we will
also protect their content.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-15-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
3783fa3dd3 block: protect tracked_requests and flush_queue with reqs_lock
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-14-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
47fec59941 block: access write_gen with atomics
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-13-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
f7946da274 block: use Stat64 for wr_highest_offset
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-12-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
ae2d489c34 util: add stats64 module
This module provides fast paths for 64-bit atomic operations on machines
that only have 32-bit atomic access.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170605123908.18777-11-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
93001e9d87 throttle-groups: protect throttled requests with a CoMutex
Another possibility is to use tg->lock, which we're holding anyway in
both schedule_next_request and throttle_group_co_io_limits_intercept.
This would require open-coding the CoQueue however, so I've chosen this
alternative.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-10-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
3b170dc867 throttle-groups: do not use qemu_co_enter_next
Prepare for removing this function; always restart throttled requests
from coroutine context.  This will matter when restarting throttled
requests will have to acquire a CoMutex.

Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-9-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
7258ed930c throttle-groups: only start one coroutine from drained_begin
Starting all waiting coroutines from bdrv_drain_all is unnecessary;
throttle_group_co_io_limits_intercept calls schedule_next_request as
soon as the coroutine restarts, which in turn will restart the next
request if possible.

If we only start the first request and let the coroutines dance from
there the code is simpler and there is more reuse between
throttle_group_config, throttle_group_restart_blk and timer_cb.  The
next patch will benefit from this.

We also stop accessing from throttle_group_restart_blk the
blkp->throttled_reqs CoQueues even when there was no
attached throttling group.  This worked but is not pretty.

The only thing that can interrupt the dance is the QEMU_CLOCK_VIRTUAL
timer when switching from one block device to the next, because the
timer is set to "now + 1" but QEMU_CLOCK_VIRTUAL might not be running.
Set that timer to point in the present ("now") rather than the future
and things work.

Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-8-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
850d54a2a9 block: access io_plugged with atomic ops
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-7-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
e2a6ae7fe5 block: access wakeup with atomic ops
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-6-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
20fc71b25c block: access serialising_in_flight with atomic ops
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-5-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
d993b85804 block: access io_limits_disabled with atomic ops
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-4-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
414c2ec358 block: access quiesce_counter with atomic ops
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-3-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Paolo Bonzini
d3faa13e5f block: access copy_on_read with atomic ops
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170605123908.18777-2-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Fam Zheng
79f24568e5 docker: Add flex and bison to centos6 image
Currently there are warnings about flex and bison being missing when
building in the centos6 image:

    make[1]: flex: Command not found
             BISON dtc-parser.tab.c
    make[1]: bison: Command not found

Add them.

Reported-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170524005206.31916-1-famz@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Fam Zheng
80c58a5b1b docker: Add libaio to fedora image
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170505032340.26467-5-famz@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Fam Zheng
73a27bbb69 docker: Add bzip2 and hostname to fedora image
It is used by qemu-iotests.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170505032340.26467-3-famz@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Fam Zheng
f9f65a4af0 docker: Run tests with current user
We've used --add-current-user to create a user in the image, use it to
run tests, because root has too much priviledge, and can surprise test
cases.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170505032340.26467-2-famz@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-06-16 07:55:00 +08:00
Peter Maydell
edf8bc9842 Merge remote-tracking branch 'remotes/rth/tags/pull-s390-20170613' into staging
Queued s390 patches

# gpg: Signature made Tue 13 Jun 2017 21:22:41 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-s390-20170613:
  s390x/cpumodel: wire up cpu type + id for TCG
  target/s390x: rework PGM interrupt psw.addr handling
  target/s390x: correctly indicate PER nullification

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-15 13:45:15 +01:00
Felipe Franciosi
49cc0340f8 vhost-user-scsi: Introduce a vhost-user-scsi sample application
This commit introduces a vhost-user-scsi backend sample application. It
must be linked with libiscsi and libvhost-user.

To use it, compile with:
  $ make vhost-user-scsi

And run as follows:
  $ ./vhost-user-scsi -u vus.sock -i iscsi://uri_to_target/
  $ qemu-system-x86_64 --enable-kvm -m 512 \
      -object memory-backend-file,id=mem,size=512m,share=on,mem-path=guestmem \
      -numa node,memdev=mem \
      -chardev socket,id=vhost-user-scsi,path=vus.sock \
      -device vhost-user-scsi-pci,chardev=vhost-user-scsi \

The application is currently limited at one LUN only and it processes
requests synchronously (therefore only achieving QD1). The purpose of
the code is to show how a backend can be implemented and to test the
vhost-user-scsi Qemu implementation.

If a different instance of this vhost-user-scsi application is executed
at a remote host, a VM can be live migrated to such a host.

Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
Message-Id: <1488479153-21203-5-git-send-email-felipe@nutanix.com>
2017-06-15 11:18:40 +02:00
Felipe Franciosi
f12c1ebddf vhost-user-scsi: Introduce vhost-user-scsi host device
This commit introduces a vhost-user device for SCSI. This is based
on the existing vhost-scsi implementation, but done over vhost-user
instead. It also uses a chardev to connect to the backend. Unlike
vhost-scsi (today), VMs using vhost-user-scsi can be live migrated.

To use it, start Qemu with a command line equivalent to:

qemu-system-x86_64 \
       -chardev socket,id=vus0,path=/tmp/vus.sock \
       -device vhost-user-scsi-pci,chardev=vus0,bus=pci.0,addr=...

A separate commit presents a sample application linked with libiscsi to
provide a backend for vhost-user-scsi.

Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
Message-Id: <1488479153-21203-4-git-send-email-felipe@nutanix.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:18:40 +02:00
Paolo Bonzini
44cb280d33 qemu-doc: include version number
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:18:40 +02:00
Paolo Bonzini
d59157ea05 docs: create interop/ subdirectory
This is for the future interoperability & management guide.  It includes
the QAPI docs, including the automatically generated ones, other socket
protocols (vhost-user, VNC), and the qcow2 file format.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:18:39 +02:00
Thomas Huth
067b913619 include/exec/poison: Mark some CONFIG defines as poisoned, too
These are defined in config-target.h and thus should never be
used in common code.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1497468113-2874-3-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:18:39 +02:00
Thomas Huth
e947738e38 include/exec/poison: Add missing TARGET defines
Since we've got some new CPU targets in QEMU during the last months
and years, we've got some new TARGET_xxx defines now which should
be marked as poisoned for common code.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1497468113-2874-2-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:18:39 +02:00
Vladimir Sementsov-Ogievskiy
8c372a02e0 nbd/server: refactor nbd_trip
- do not use 'goto error_reply' outside a switch to jump into the
  middle of the switch's default case label
- reduce code duplication

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170602150150.258222-13-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:18:39 +02:00
Vladimir Sementsov-Ogievskiy
2e5c9ad6f4 nbd/server: rename rc to ret
For consistency use 'ret' name for saving return code everywhere
in the file.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170602150150.258222-12-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:18:39 +02:00
Vladimir Sementsov-Ogievskiy
d9faeed854 nbd/server: get rid of fail: return rc
"goto fail" error handling scheme is not needed for just returning
error code. Better is return it immediately.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170602150150.258222-11-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:18:39 +02:00
Vladimir Sementsov-Ogievskiy
7798d3aab9 nbd/server: nbd_negotiate: fix error path
Current code will return 0 on this nbd_write fail, as rc is 0
after successful nbd_negotiate_options. Fix this.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170602150150.258222-10-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:18:39 +02:00
Vladimir Sementsov-Ogievskiy
c84087f2f5 nbd/server: remove NBDClientNewData
"co" field of NBDClientNewData has never been used, all the way back to
its declaration in commit 1a6245a5. So let's just use client pointer
instead of extra structure.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170602150150.258222-9-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:18:32 +02:00
Vladimir Sementsov-Ogievskiy
ee898b870f nbd/server: refactor nbd_co_receive_request
Move function tail, about receiving next request out of the function.
Error path is simplified and nbd_co_receive_request becomes more
corresponding to its name.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170602150150.258222-8-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:06 +02:00
Vladimir Sementsov-Ogievskiy
2a6e128bfa nbd/server: get rid of EAGAIN dead code
For now nbd_read never returns EAGAIN. So, don't handle it.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170602150150.258222-7-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:06 +02:00
Vladimir Sementsov-Ogievskiy
572b97e722 nbd/server: refactor nbd_co_send_reply
As nbd_write never returns value > 0, we can get rid of extra ret.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170602150150.258222-6-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:06 +02:00
Vladimir Sementsov-Ogievskiy
a0dc63a6b7 nbd/server: get rid of ssize_t
Now nbd_read and friends return int, so get rid of ssize_t.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170602150150.258222-5-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:06 +02:00
Vladimir Sementsov-Ogievskiy
2b0bbc4f88 nbd/server: get rid of nbd_negotiate_read and friends
Functions nbd_negotiate_{read,write,drop_sync} were introduced in
1a6245a5b, when nbd_rwv (was nbd_wr_sync) was working through
qemu_co_sendv_recvv (the path is nbd_wr_sync -> qemu_co_{recv/send} ->
qemu_co_send_recv -> qemu_co_sendv_recvv), which just yields, without
setting any handlers. But starting from ff82911cd nbd_rwv (was
nbd_wr_syncv) works through qio_channel_yield() which sets handlers, so
watchers are redundant in nbd_negotiate_{read,write,drop_sync}, then,
let's just use nbd_{read,write,drop} functions.

Functions nbd_{read,write,drop} has errp parameter, which is unused in
this patch. This will be fixed later.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170602150150.258222-4-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:06 +02:00
Vladimir Sementsov-Ogievskiy
44298024d3 nbd: make nbd_drop public
Following commit will reuse it for nbd server too.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170602150150.258222-3-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:06 +02:00
Vladimir Sementsov-Ogievskiy
d1fdf257d5 nbd: rename read_sync and friends
Rename
  nbd_wr_syncv -> nbd_rwv
  read_sync -> nbd_read
  read_sync_eof -> nbd_read_eof
  write_sync -> nbd_write
  drop_sync -> nbd_drop

1. nbd_ prefix
   read_sync and write_sync are already shared, so it is good to have a
   namespace prefix. drop_sync will be shared, and read_sync_eof is
   related to read_sync, so let's rename them all.

2. _sync suffix
   _sync is related to the fact that nbd_wr_syncv doesn't return if a
   write to socket returns EAGAIN. The first implementation of
   nbd_wr_syncv (was wr_sync in 7a5ca8648b) just loops while getting
   EAGAIN, the current implementation yields in this case.
   Why we want to get rid of it:
   - it is normal for r/w functions to be synchronous, so having an
     additional suffix for it looks redundant (contrariwise, we have
     _aio suffix for async functions)
   - _sync suffix in block layer is used when function does flush (so
     using it for other thing is confusing a bit)
   - keep function names short after adding nbd_ prefix

3. for nbd_wr_syncv let's use more common notation 'rw'

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170602150150.258222-2-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:06 +02:00
Yang Zhong
92229a57bb accel: move kvm related accelerator files into accel/
move kvm related accelerator files into accel/ subdirectory, also
create one stub subdirectory, which will include accelerator's stub
files.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Message-Id: <1496383606-18060-5-git-send-email-yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:06 +02:00
Yang Zhong
244f144134 tcg: move tcg backend files into accel/tcg/
move tcg-runtime.c, translate-all.(ch) and translate-common.c into
accel/tcg/ subdirectory and updated related trace-events file.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Message-Id: <1496383606-18060-4-git-send-email-yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:06 +02:00
Yang Zhong
d9bb58e510 tcg: move tcg related files into accel/tcg/ subdirectory
move cputlb.c, cpu-exec-common.c and cpu-exec.c related tcg exec
file into accel/tcg/ subdirectory.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Message-Id: <1496383606-18060-3-git-send-email-yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:06 +02:00
Yang Zhong
a9ded6017e accel: split the tcg accelerator from accel.c file
there are some types of accelerators in qemu, and all accelerators
have their own file except tcg. tcg accelerator is also defined in
accel.c file. tcg accelerator file will be splited from accel.c and
re-name to tcg-all.c. accel/ directory will be created to include
kvm and tcg related files.

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Message-Id: <1496383606-18060-2-git-send-email-yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Max Reitz
041e32b8d9 qemu-nbd: Ignore SIGPIPE
qemu proper has done so for 13 years
(8a7ddc38a6), qemu-img and qemu-io have
done so for four years (526eda14a6).
Ignoring this signal is especially important in qemu-nbd because
otherwise a client can easily take down the qemu-nbd server by dropping
the connection when the server wants to send something, for example:

$ qemu-nbd -x foo -f raw -t null-co:// &
[1] 12726
$ qemu-io -c quit nbd://localhost/bar
can't open device nbd://localhost/bar: No export with name 'bar' available
[1]  + 12726 broken pipe  qemu-nbd -x foo -f raw -t null-co://

In this case, the client sends an NBD_OPT_ABORT and closes the
connection (because it is not required to wait for a reply), but the
server replies with an NBD_REP_ACK (because it is required to reply).

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20170611123714.31292-1-mreitz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Eric Blake
0c9390d978 nbd: Fix regression on resiliency to port scan
Back in qemu 2.5, qemu-nbd was immune to port probes (a transient
server would not quit, regardless of how many probe connections
came and went, until a connection actually negotiated).  But we
broke that in commit ee7d7aa when removing the return value to
nbd_client_new(), although that patch also introduced a bug causing
an assertion failure on a client that fails negotiation.  We then
made it worse during refactoring in commit 1a6245a (a segfault
before we could even assert); the (masked) assertion was cleaned
up in d3780c2 (still in 2.6), and just recently we finally fixed
the segfault ("nbd: Fully intialize client in case of failed
negotiation").  But that still means that ever since we added
TLS support to qemu-nbd, we have been vulnerable to an ill-timed
port-scan being able to cause a denial of service by taking down
qemu-nbd before a real client has a chance to connect.

Since negotiation is now handled asynchronously via coroutines,
we no longer have a synchronous point of return by re-adding a
return value to nbd_client_new().  So this patch instead wires
things up to pass the negotiation status through the close_fn
callback function.

Simple test across two terminals:
$ qemu-nbd -f raw -p 30001 file
$ nmap 127.0.0.1 -p 30001 && \
  qemu-io -c 'r 0 512' -f raw nbd://localhost:30001

Note that this patch does not change what constitutes successful
negotiation (thus, a client must enter transmission phase before
that client can be considered as a reason to terminate the server
when the connection ends).  Perhaps we may want to tweak things
in a later patch to also treat a client that uses NBD_OPT_ABORT
as being a 'successful' negotiation (the client correctly talked
the NBD protocol, and informed us it was not going to use our
export after all), but that's a discussion for another day.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1451614

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170608222617.20376-1-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Paolo Bonzini
457e03559d hax-all: make async_safe_run_on_cpu safe on HAX too
While at it, drop the current_cpu assignment since this is a
per-thread variable on modern QEMU.

Cc: Vincent Palatin <vpalatin@chromium.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Roman Kagan
1d78a3c3ab kvm-all: make async_safe_run_on_cpu safe on kvm too
Wrap the bulk of kvm_cpu_exec with cpu_exec_start/end, so that kvm
version can also enjoy performing certain operations while all vCPUs are
quiescent.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Message-Id: <20170606181948.16238-15-rkagan@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Thomas Huth
a20fa79fa5 vl: Fix broken thread=xxx option of the --accel parameter
Commit bde4d9205 ("Fix the -accel parameter and the documentation for
'hax'") introduced a regression by adding a new local accel_opts
variable which shadows the variable with the same name that is
declared at the beginning of the main() scope. This causes the
qemu_tcg_configure() call later to be always called with NULL, so
that the thread=xxx option gets ignored. Fix it by removing the
local accel_opts variable and use "opts" instead, which is meant
for storing temporary QemuOpts values.
And while we're at it, also change the exit(1) here to exit(0)
since asking for help is not an error.

Fixes: bde4d9205e
Reported-by: Markus Armbruster <armbru@redhat.com>
Reported-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1496899257-25800-1-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Thomas Huth
428952cfa9 Makefile: Do not generate files if "configure" has not been run yet
When doing a "make -j10" in the vanilla QEMU source tree (without
running "configure" first), the Makefile currently generates two
files already, qemu-version.h and qemu-options.def. This should not
happen, so let's only build the generated files if config-host.mak
is available (i.e. "configure" has been run already).

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1496926799-13040-1-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Paolo Bonzini
87e459a810 megasas: always store SCSIRequest* into MegasasCmd
This ensures that the request is unref'ed properly, and avoids a
segmentation fault in the new qtest testcase that is added.
This is CVE-2017-9503.

Reported-by: Zhangyanyu <zyy4013@stu.ouc.edu.cn>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Paolo Bonzini
b356807fcd megasas: do not read SCSI req parameters more than once from frame
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Paolo Bonzini
36c327a69d megasas: do not read command more than once from frame
Avoid TOC-TOU bugs by passing the frame_cmd down, and checking
cmd->dcmd_opcode instead of cmd->frame->header.frame_cmd.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Paolo Bonzini
5104fac853 megasas: do not read DCMD opcode more than once from frame
Avoid TOC-TOU bugs by storing the DCMD opcode in the MegasasCmd

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Paolo Bonzini
24c0c77af5 megasas: do not read iovec count more than once from frame
Avoid TOC-TOU bugs depending on how the compiler behaves.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Paolo Bonzini
134550bf81 megasas: do not read sense length more than once from frame
Avoid TOC-TOU bugs depending on how the compiler behaves.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Paolo Bonzini
660174fc1b megasas: add qtest
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Marc-André Lureau
6b9911d0b6 memory: remove memory_region_set_fd
Now unnecessary since ivshmem uses memory_region_init_ram_from_fd.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170602141229.15326-7-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Marc-André Lureau
8381d89bec ivshmem: use ram_from_fd()
Instead of having its own mmap handling code, reuse the code from
exec.c.

Note: memory_region_init_ram_from_fd() adds some restrictions
(check for xen, kvm sync-mmu, etc) and changes (such as size
alignment). This may actually be more correct.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170602141229.15326-6-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Marc-André Lureau
fea617c58b Add memory_region_init_ram_from_fd()
Add a new function to initialize a RAM memory region with a file
descriptor to be mmap-ed.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170602141229.15326-5-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:05 +02:00
Marc-André Lureau
38b3362dd1 exec: split qemu_ram_alloc_from_file()
Add qemu_ram_alloc_from_fd(), which can be use to allocate ramblock from
fd only.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170602141229.15326-4-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:04 +02:00
Marc-André Lureau
8d37b030fe exec: split file_ram_alloc()
Move file opening part in a seperate function, file_ram_open(). This
allows for reuse of file_ram_alloc() with a given fd.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170602141229.15326-3-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:04 +02:00
Marc-André Lureau
e45e7ae281 exec: check kvm mmu notifiers earlier
Move kvm mmu notifiers check before calling file_ram_alloc(), with the
other xen precondition. (file_ram_alloc() will be reused in other cases
than -mem-path).

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170602141229.15326-2-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-15 11:04:04 +02:00
Peter Maydell
73aa4692ec Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170614' into staging
migration/next for 20170614

# gpg: Signature made Wed 14 Jun 2017 11:16:21 BST
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration/20170614:
  migration: Don't create decompression threads if not enabled
  migration: Test for disabled features on reception
  migration: Remove unneeded includes
  migration: fix incorrect enable return path
  migration: Fix compilation with older compilers

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-15 09:43:12 +01:00
Laurent Vivier
69e698220f target-m68k: define ext_opsize
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170611231633.32582-4-laurent@vivier.eu>
2017-06-15 09:16:38 +02:00
Laurent Vivier
c88f8107b1 target-m68k: move FPU helpers to fpu_helper.c
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170611231633.32582-3-laurent@vivier.eu>
2017-06-15 09:16:16 +02:00
Laurent Vivier
e5b0cbe8e8 softfloat: define 680x0 specific values
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170611231633.32582-2-laurent@vivier.eu>
2017-06-15 09:15:38 +02:00
Laurent Vivier
043b936ef6 target/m68k: fix V flag for CC_OP_SUBx
V flag for subtraction is:

   v = (res ^ src1) & (src1 ^ src2)

(see COMPUTE_CCR() in target/m68k/helper.c)

But gen_flush_flags() uses:

   v = (res ^ src2) & (src1 ^ src2)

The problem has been found with the following program:

        .global _start
_start:
        move.l  #-2147483648,%d0
        subq.l  #1,%d0
        jvc     1f
        move.l #1,%d1
        move.l #1,%d0
        trap #0
1:
        move.l #0,%d1
        move.l #1,%d0
        trap #0

It works fine (exit(1)) on real hardware, and with "-singlestep".

"-singlestep" uses gen_helper_flush_flags(), whereas
without "-singlestep", V flag is computed directly in
gen_flush_flags().

This patch updates gen_flush_flags() to have the same result
as with gen_helper_flush_flags().

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170614203905.19657-1-laurent@vivier.eu>
2017-06-15 08:50:30 +02:00
Jeff Cody
5c3ad1a6a8 block/iscsi: enable filename option and parsing
When enabling option parsing and blockdev-add for iscsi, we removed the
'filename' option.  Unfortunately, this was a bit optimistic, as
previous versions of QEMU allowed the use of the option in backing
filenames via json.  This means that without parsing this option, we
cannot open existing images that used to work fine.

See bug: https://bugzilla.redhat.com/show_bug.cgi?id=1457088

Tested-by: Richard W.M. Jones <rjones@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
Message-id: 0789ab6c32814ab4b6896707d378804bd4424c65.1497444637.git.jcody@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-06-14 17:39:46 -04:00
Jeff Cody
91589d9e5c block/rbd: enable filename option and parsing
When enabling option parsing and blockdev-add for rbd, we removed the
'filename' option.  Unfortunately, this was a bit optimistic, as
previous versions of QEMU allowed the use of the option in backing
filenames via json.  This means that without parsing this option, we
cannot open existing images that used to work fine.

See bug: https://bugzilla.redhat.com/show_bug.cgi?id=1457088

Tested-by: Richard W.M. Jones <rjones@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
Message-id: 937dc9fde348d13311eb8e23444df3bc3190b612.1497444637.git.jcody@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-06-14 17:39:46 -04:00
Juan Quintela
3416ab5bb4 migration: Don't create decompression threads if not enabled
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>

--

I removed the [HACK] part because previous patch just check that
compression pages are not received.
2017-06-14 11:11:06 +02:00
Juan Quintela
edc60127e4 migration: Test for disabled features on reception
Right now, if we receive a compressed page while this features are
disabled, Bad Things (TM) can happen.  Just add a test for them.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>

--

I had XBZRLE here also, but it don't need extra resources on
destination, only on source.  Additionally libvirt don't enable it on
destination, so don't put it here.

- initialize invalid_flags at declaration time.
- remove extra space (peter)
2017-06-14 11:11:06 +02:00
Juan Quintela
1adc1ceef7 migration: Remove unneeded includes
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
2017-06-14 11:10:19 +02:00
Peter Xu
62a0265852 migration: fix incorrect enable return path
0425dc9 is actually v1 of that patch, but it was accidentally
merged (while there was a v2). That will cause problem when we try to
migrate to some old QEMUs when return path is not really there. Let's
fix it, then squashing this patch with 0425dc9 will be exactly patch
content of v2.

Fixes: 0425dc9 ("migration: isolate return path on src")
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-14 11:09:38 +02:00
Juan Quintela
68a4a2fda1 migration: Fix compilation with older compilers
That typedefs are needed on both files.  New compilers (F25 where I
work) don't complain about repeating a typedef.  But older ones
complain.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-06-14 11:08:55 +02:00
Gerd Hoffmann
fe5c44f9c9 spice: don't enter opengl mode in case another UI provides opengl support
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170606110618.10393-1-kraxel@redhat.com
2017-06-14 09:52:35 +02:00
Gerd Hoffmann
8f4ea9cd0b sdl: prefer sdl2 over sdl1
In case the configure script finds both SDL 1.2 and SDL 2.x installed
it still prefers SDL 1.2.  Prefer SDL 2.x instead.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170606105339.3613-3-kraxel@redhat.com
2017-06-14 09:51:45 +02:00
Gerd Hoffmann
5fe309ff0d gtk: prefer gtk3 over gtk2
In case the configure script finds both gtk2 and gtk3 installed it
still prefers gtk2 over gtk3.  Prefer gtk3 instead.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170606105339.3613-2-kraxel@redhat.com
2017-06-14 09:51:45 +02:00
Jonathon Jongsma
bfefa6d7d6 spice: Use proper enum type for kbd led state
Although the Qemu and spice flags currently have the same value, it
seems more correct to pass the spice flag values to
spice_server_kbd_leds(), especially considering that this function
already makes an effort to convert between the QEMU_*_LED and
SPICE_KEYBOARD_MODIFIER_* values.

Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170510202006.31737-1-jjongsma@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-06-14 09:51:45 +02:00
Ian McKellar via Qemu-devel
af8862b2a2 Improve Cocoa modifier key handling
I had two problems with QEMU on macOS:
 1) Sometimes when alt-tabbing to QEMU it would act as if the 'a' key
    was pressed so I'd get 'aaaaaaaaa....'.
 2) Using Sikuli to programatically send keys to the QEMU window text
    like "foo_bar" would come out as "fooa-bar".

They looked similar and after much digging the problem turned out to be
the same. When QEMU's ui/cocoa.m received an NSFlagsChanged NSEvent it
looked at the keyCode to determine what modifier key changed. This
usually works fine but sometimes the keyCode is 0 and the app should
instead be looking at the modifierFlags bitmask. Key code 0 is the 'a'
key.

I added code that handles keyCode == 0 differently. It checks the
modifierFlags and if they differ from QEMU's idea of which modifier
keys are currently pressed it toggles those changed keys.

This fixes my problems and seems work fine.

Signed-off-by: Ian McKellar <ianloic@google.com>
Message-id: 20170526233816.47627-1-ianloic@google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-06-14 09:51:45 +02:00
David Hildenbrand
076d4d39b6 s390x/cpumodel: wire up cpu type + id for TCG
Let's properly expose the CPU type (machine-type number) via "STORE CPU
ID" and "STORE SUBSYSTEM INFORMATION".

As TCG emulates basic mode, the CPU identification number has the format
"Annnnn", whereby A is the CPU address, and n are parts of the CPU serial
number (0 for us for now).

A specification exception will be injected if the address is not aligned
to a double word. Low address protection will not be checked as
we're missing some more general support for that.

Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170609133426.11447-3-david@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-13 11:09:39 -07:00
David Hildenbrand
becf8217de target/s390x: rework PGM interrupt psw.addr handling
We can tell from the program interrupt code, whether a program interrupt
has to forward the address in the PGM new PSW
(suppressing/terminated/completed) to point at the next instruction, or
if it is nullifying and the PSW address does not have to be incremented.

So let's not modify the PSW address outside of the injection path and
handle this internally. We just have to handle instruction length
auto detection if no valid instruction length can be provided.

This should fix various program interrupt injection paths, where the
PSW was not properly forwarded.

Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170609142156.18767-3-david@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-13 11:09:39 -07:00
David Hildenbrand
465aec4617 target/s390x: correctly indicate PER nullification
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170609142156.18767-2-david@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-13 10:53:07 -07:00
Peter Maydell
3f0602927b Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170613' into staging
target-arm queue:
 * vITS: Support save/restore
 * timer/aspeed: Fix timer enablement when reload is not set
 * aspped: add temperature sensor device
 * timer.h: Provide better monotonic time on ARM hosts
 * exynos4210: various cleanups
 * exynos4210: support system poweroff

# gpg: Signature made Tue 13 Jun 2017 15:05:49 BST
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170613:
  hw/intc/arm_gicv3_its: Allow save/restore
  hw/intc/arm_gicv3_kvm: Implement pending table save
  hw/intc/arm_gicv3_its: Implement state save/restore
  kvm-all: Pass an error object to kvm_device_access
  timer/aspeed: fix timer enablement when a reload is not set
  aspeed: add a temp sensor device on I2C bus 3
  hw/misc: add a TMP42{1, 2, 3} device model
  timer.h: Provide better monotonic time
  hw/misc/exynos4210_pmu: Add support for system poweroff
  hw/intc/exynos4210_gic: Constify array of combiner interrupts
  hw/arm/exynos: Use type define instead of hard-coded a9mpcore_priv string
  hw/arm/exynos: Declare local variables in some order
  hw/arm/exynos: Move DRAM initialization next boards
  hw/timer/exynos4210_mct: Remove unused defines
  hw/timer/exynos4210_mct: Cleanup indentation and empty new lines
  hw/timer/exynos4210_mct: Fix checkpatch style errors
  hw/intc/exynos4210_gic: Use more meaningful name for local variable

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 15:49:07 +01:00
Eric Auger
252a7a6a96 hw/intc/arm_gicv3_its: Allow save/restore
We change the restoration priority of both the GICv3 and ITS. The
GICv3 must be restored before the ITS and the ITS needs to be restored
before PCIe devices since it translates their MSI transactions.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-id: 1497023553-18411-5-git-send-email-eric.auger@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:57:01 +01:00
Eric Auger
d5aa0c229a hw/intc/arm_gicv3_kvm: Implement pending table save
This patch adds the flush of the LPI pending bits into the
redistributor pending tables. This happens on VM stop.

There is no explicit restore as the tables are implicitly sync'ed
on ITS table restore and on LPI enable at redistributor level.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1497023553-18411-4-git-send-email-eric.auger@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:57:00 +01:00
Eric Auger
cddafd8f35 hw/intc/arm_gicv3_its: Implement state save/restore
We need to handle both registers and ITS tables. While
register handling is standard, ITS table handling is more
challenging since the kernel API is devised so that the
tables are flushed into guest RAM and not in vmstate buffers.

Flushing the ITS tables on device pre_save() is too late
since the guest RAM is already saved at this point.

Table flushing needs to happen when we are sure the vcpus
are stopped and before the last dirty page saving. The
right point is RUN_STATE_FINISH_MIGRATE but sometimes the
VM gets stopped before migration launch so let's simply
flush the tables each time the VM gets stopped.

For regular ITS registers we just can use vmstate pre_save()
and post_load() callbacks.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1497023553-18411-3-git-send-email-eric.auger@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:57:00 +01:00
Eric Auger
556969e938 kvm-all: Pass an error object to kvm_device_access
In some circumstances, we don't want to abort if the
kvm_device_access fails. This will be the case during ITS
migration, in case the ITS table save/restore fails because
the guest did not program the vITS correctly. So let's pass an
error object to the function and return the ioctl value. New
callers will be able to make a decision upon this returned
value.

Existing callers pass &error_abort which will cause the
function to abort on failure.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Message-id: 1497023553-18411-2-git-send-email-eric.auger@redhat.com
[PMM: wrapped long line]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:57:00 +01:00
Cédric Le Goater
1403f36447 timer/aspeed: fix timer enablement when a reload is not set
When a timer is enabled before a reload value is set, the controller
waits for a reload value to be set before starting decrementing. This
fix tries to cover that case by changing the timer expiry only when
a reload value is valid.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Andrew Jeffery <andrew@aj.id.au>
Message-id: 1496739312-32304-1-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:57:00 +01:00
Cédric Le Goater
a87e81b9b5 aspeed: add a temp sensor device on I2C bus 3
Temperatures can be changed from the monitor with :

	(qemu) qom-set /machine/unattached/device[2] temperature0 12000

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1496739230-32109-3-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:59 +01:00
Cédric Le Goater
fe3874b6a1 hw/misc: add a TMP42{1, 2, 3} device model
Largely inspired by the TMP105 temperature sensor, here is a model for
the TMP42{1,2,3} temperature sensors.

Specs can be found here :

	http://www.ti.com/lit/gpn/tmp421

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1496739230-32109-2-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:59 +01:00
Pranith Kumar
d1bb099f63 timer.h: Provide better monotonic time
Tested and confirmed that the stretch i386 debian qcow2 image on a
raspberry pi 2 works.

Fixes: LP#: 893208 <https://bugs.launchpad.net/qemu/+bug/893208/>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170418191817.10430-1-bobby.prani@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:59 +01:00
Krzysztof Kozlowski
a14f9b8292 hw/misc/exynos4210_pmu: Add support for system poweroff
On all Exynos-based boards, the system powers down itself by driving
PS_HOLD signal low - eight bit in PS_HOLD_CONTROL register of PMU.
Handle writing to respective PMU register to fix power off failure:

    reboot: Power down
    Unable to poweroff system
    shutdown: 31 output lines suppressed due to ratelimiting
    Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000000

    CPU: 0 PID: 1 Comm: shutdown Not tainted 4.11.0-rc8 #846
    Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
    [<c031050c>] (unwind_backtrace) from [<c030ba6c>] (show_stack+0x10/0x14)
    [<c030ba6c>] (show_stack) from [<c05b2800>] (dump_stack+0x88/0x9c)
    [<c05b2800>] (dump_stack) from [<c03d3140>] (panic+0xdc/0x268)
    [<c03d3140>] (panic) from [<c0343614>] (do_exit+0xa90/0xab4)
    [<c0343614>] (do_exit) from [<c035f2dc>] (SyS_reboot+0x164/0x1d0)
    [<c035f2dc>] (SyS_reboot) from [<c0307c80>] (ret_fast_syscall+0x0/0x3c)

Additionally the initial value of PS_HOLD has to be changed because
recent Linux kernel (v4.12-rc1) uses regmap cache for this access.
When the register is kept at reset value, the kernel will not issue a
write to it.  Usually the bootloader sets the eight bit of PS_HOLD high
so mimic its existence here.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:58 +01:00
Krzysztof Kozlowski
5f7f22ffe1 hw/intc/exynos4210_gic: Constify array of combiner interrupts
The static array of interrupt combiner mappings is not modified so it
can be made const for code safeness.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:58 +01:00
Krzysztof Kozlowski
9e883790dd hw/arm/exynos: Use type define instead of hard-coded a9mpcore_priv string
Use a define for a9mpcore_priv device type name instead of hard-coded
string.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:58 +01:00
Krzysztof Kozlowski
310150c000 hw/arm/exynos: Declare local variables in some order
Bring some more readability by declaring local function variables: first
initialized ones and then the rest (with reversed-christmas-tree order).

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:57 +01:00
Krzysztof Kozlowski
a2f2f6249b hw/arm/exynos: Move DRAM initialization next boards
Before QOM-ifying the Exynos4 SoC model, move the DRAM initialization
from exynos4210.c to exynos4_boards.c because DRAM is board specific,
not SoC.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:57 +01:00
Krzysztof Kozlowski
986924f875 hw/timer/exynos4210_mct: Remove unused defines
Remove defines not used anywhere.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:57 +01:00
Krzysztof Kozlowski
54ab9927d1 hw/timer/exynos4210_mct: Cleanup indentation and empty new lines
Statements under 'case' were in some places wrongly indented bringing
confusion and making the code less readable.  Remove also few unneeded
blank lines.  No functional changes.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:57 +01:00
Krzysztof Kozlowski
92e5d7e222 hw/timer/exynos4210_mct: Fix checkpatch style errors
Fix checkpatch errors:
1. ERROR: spaces required around that '+' (ctx:VxV)
2. ERROR: spaces required around that '&' (ctx:VxV)

No functional changes.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:56 +01:00
Krzysztof Kozlowski
ee78356eba hw/intc/exynos4210_gic: Use more meaningful name for local variable
There are to SysBusDevice variables in exynos4210_gic_realize()
function: one for the device itself and second for arm_gic device.  Add
a prefix "gic" to the second one so it will be easier to understand the
code.

While at it, put local uninitialized 'i' variable at the end, next to
other uninitialized ones.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:56:56 +01:00
Peter Maydell
6f153ceb9b Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Tue 13 Jun 2017 14:35:25 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  monitor: resurrect handle_qmp_command trace event
  monitor: add handle_hmp_command trace event

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 14:40:17 +01:00
Stefan Hajnoczi
b097efc002 monitor: resurrect handle_qmp_command trace event
Commit 104fc30279 ("qmp: Drop duplicated
QMP command object checks") removed the call to
trace_handle_qmp_command() while eliminating code duplication.

This patch brings the trace event back so QEMU-internal trace events can
be correlated with the QMP commands that caused them.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170605104216.22429-3-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-13 14:35:11 +01:00
Stefan Hajnoczi
79cad8b46b monitor: add handle_hmp_command trace event
It is often useful to correlate QEMU-internal events with monitor
commands that caused them.  Trace the full HMP command being executed.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170605104216.22429-2-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-13 14:35:10 +01:00
Peter Maydell
735286a4f8 Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170613' into staging
migration/next for 20170613

# gpg: Signature made Tue 13 Jun 2017 10:01:45 BST
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration/20170613:
  migration: Move migration.h to migration/
  migration: Move remaining exported functions to migration/misc.h
  migration: create global_state.c
  migration: ram_control_* are implemented in qemu_file
  migration: Commands are only used inside migration.c
  migration: Move constants to savevm.h
  migration: Move dump_vmsate_json_to_file() to misc.h
  migration: Split registration functions from vmstate.h
  migration: Move self_announce_delay() to misc.h
  migration: Remove MigrationState from migration_channel_incomming()
  ram: Now POSTCOPY_ACTIVE is the same that STATUS_ACTIVE
  ram: Print block stats also in the complete case
  migration: Don't try to set *errp directly
  migration: isolate return path on src

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 13:51:29 +01:00
Peter Maydell
e0b4891ae6 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Fri 09 Jun 2017 13:41:59 BST
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  block/gluster.c: Handle qdict_array_entries() failure

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 12:55:47 +01:00
Peter Maydell
9746211baa Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.10-20170609' into staging
ppc patch queue 2017-06-09

This batch contains more patches to rework the pseries machine hotplug
infrastructure, plus an assorted batch of bugfixes.

It contains a start on fixes to restore migration from older machine
types on older versions which was broken by some xics changes.  There
are still a few missing pieces here, though.

# gpg: Signature made Fri 09 Jun 2017 06:26:03 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.10-20170609:
  Revert "spapr: fix memory hot-unplugging"
  xics: drop ICPStateClass::cpu_setup() handler
  xics: setup cpu at realize time
  xics: pass appropriate types to realize() handlers.
  xics: introduce macros for ICP/ICS link properties
  hw/cpu: core.c can be compiled as common object
  hw/ppc/spapr: Adjust firmware name for PCI bridges
  xics: add reset() handler to ICPStateClass
  pnv_core: drop reference on ICPState object during CPU realization
  spapr: Rework DRC name handling
  spapr: Fold spapr_phb_{add,remove}_pci_device() into their only callers
  spapr: Change DRC attach & detach methods to functions
  spapr: Clean up handling of DR-indicator
  spapr: Clean up RTAS set-indicator
  spapr: Don't misuse DR-indicator in spapr_recover_pending_dimm_state()
  spapr: Clean up DR entity sense handling
  pseries: Correct panic behaviour for pseries machine type
  spapr: fix memory leak in spapr_memory_pre_plug()
  target/ppc: fix memory leak in kvmppc_is_mem_backend_page_size_ok()
  target/ppc: pass const string to kvmppc_is_mem_backend_page_size_ok()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 11:56:00 +01:00
Gerd Hoffmann
ad3c5412f2 ehci: stop recursive calls to ehci_work_bh
Can happen with usb-storage devices: ehci_work_bh calls usb-storage,
usb-storage calls into block layer, block layer may run BHs.

Add a simple bool and just do nothing in case we figure ehci_work_bh is
active.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170612073109.25930-1-kraxel@redhat.com
2017-06-13 12:17:33 +02:00
Peter Maydell
8e3cf49c47 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc, pci, vhost: fixes

Some fixes all over the place.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Thu 08 Jun 2017 20:04:24 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  hw/pcie: fix the generic pcie root port to support migration
  nvdimm acpi: fix region format interface code
  vhost-user-bridge: fix iov_restore_front() warning

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 11:14:07 +01:00
Juan Quintela
6666c96aac migration: Move migration.h to migration/
Nothing uses it outside of migration.h

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-06-13 11:00:45 +02:00
Juan Quintela
c4b63b7cc5 migration: Move remaining exported functions to migration/misc.h
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Peter Xu <peterx@redhat.com>
2017-06-13 11:00:45 +02:00
Juan Quintela
84a899de8c migration: create global_state.c
It don't belong anywhere else, just the global state where everybody
can stick other things.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-06-13 11:00:45 +02:00
Juan Quintela
2ce3bf1aa9 migration: ram_control_* are implemented in qemu_file
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-06-13 11:00:45 +02:00
Juan Quintela
da6f17903f migration: Commands are only used inside migration.c
So, move them there.  Notice that we export functions that send
commands, not the command themselves.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-06-13 11:00:45 +02:00
Juan Quintela
c3d2e2e76c migration: Move constants to savevm.h
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-06-13 11:00:45 +02:00
Juan Quintela
b7722747e4 migration: Move dump_vmsate_json_to_file() to misc.h
It was not from vmstate.c to start with.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-06-13 11:00:45 +02:00
Juan Quintela
f2a8f0a631 migration: Split registration functions from vmstate.h
They are indpendent, and nowadays almost every device register things
with qdev->vmsd.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Peter Xu <peterx@redhat.com>
2017-06-13 11:00:44 +02:00
Juan Quintela
f8d806c992 migration: Move self_announce_delay() to misc.h
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-06-13 11:00:44 +02:00
Juan Quintela
543147116e migration: Remove MigrationState from migration_channel_incomming()
All callers were calling migrate_get_current(), so do it inside the function.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-06-13 11:00:44 +02:00
Juan Quintela
c8f9f4f402 ram: Now POSTCOPY_ACTIVE is the same that STATUS_ACTIVE
Merge them.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-06-13 11:00:44 +02:00
Juan Quintela
930ac04c22 ram: Print block stats also in the complete case
Once there, create populate_disk_info.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>

--

- create populate_disk_info instead of "abusing" populate_ram_info
2017-06-13 11:00:44 +02:00
Eduardo Habkost
250561e1ae migration: Don't try to set *errp directly
Assigning directly to *errp is not valid, as errp may be NULL,
&error_fatal, or &error_abort.  Use error_propagate() instead.

Cc: Juan Quintela <quintela@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-13 11:00:44 +02:00
Peter Xu
0425dc9762 migration: isolate return path on src
There are some places that binded "return path" with postcopy. Let's be
prepared for its usage even without postcopy. This patch mainly did this
on source side.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-13 11:00:44 +02:00
Peter Maydell
f4f3082b0c Merge remote-tracking branch 'remotes/borntraeger/tags/s390x-20170608' into staging
s390x: misc fixes

bunch of fixes
- reject MIDA accesses for CCWs
- cpumodel fixes
- cross-build fix for bios
- migration improvements

# gpg: Signature made Thu 08 Jun 2017 14:10:29 BST
# gpg:                using RSA key 0x117BBC80B5A61C7C
# gpg: Good signature from "Christian Borntraeger (IBM) <borntraeger@de.ibm.com>"
# Primary key fingerprint: F922 9381 A334 08F9 DBAB  FBCA 117B BC80 B5A6 1C7C

* remotes/borntraeger/tags/s390x-20170608:
  s390x/cpumodel: improve defintion search without an IBC
  s390x/cpumodel: take care of the cpuid format bit for KVM
  pc-bios/s390-ccw: use STRIP variable in Makefile
  s390x/css: fence off MIDA
  s390x/css: catch section mismatch on load

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-13 09:27:17 +01:00
Peter Maydell
9bba618f18 Merge remote-tracking branch 'remotes/elmarco/tags/char-pull-request' into staging
# gpg: Signature made Thu 08 Jun 2017 15:12:11 BST
# gpg:                using RSA key 0xDAE8E10975969CE5
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>"
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* remotes/elmarco/tags/char-pull-request:
  test-char: start a /char/serial test
  chardev: don't use alias names in parse_compat()
  char: fix alias devices regression

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-12 19:26:49 +01:00
Gerd Hoffmann
d54fddea98 xhci: only update dequeue ptr on completed transfers
The dequeue pointer should only be updated in case the transfer
is actually completed.  If we update it for inflight transfers
we will not pick them up again after migration, which easily
triggers with HID devices as they typically have a pending
transfer, waiting for user input to happen.

Fixes: 243afe858b
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1451631
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Tested-by: Laurent Vivier <lvivier@redhat.com>
Message-id: 20170608074122.32099-1-kraxel@redhat.com
2017-06-12 16:14:04 +02:00
Peter Maydell
5093f028ce Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Wed 07 Jun 2017 19:55:32 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  simpletrace: Improve the error message if event is not declared

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-12 14:51:30 +01:00
Peter Maydell
2a8469aaab Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Wed 07 Jun 2017 19:06:51 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  configure: split c and cxx extra flags
  coroutine-lock: do not touch coroutine after another one has been entered
  .gdbinit: load QEMU sub-commands when gdb starts
  coccinelle: fix typo in comment
  oslib: strip trailing '\n' from error_setg() string argument

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-12 14:14:42 +01:00
Peter Maydell
475df9d809 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Fri 09 Jun 2017 12:47:31 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  block: fix external snapshot abort permission error
  block/qcow.c: Fix memory leak in qcow_create()
  qemu-iotests: Test automatic commit job cancel on hot unplug
  commit: Fix use after free in completion
  qemu-iotests: Block migration test
  migration/block: Clean up BBs in block_save_complete()
  migration: Inactivate images after .save_live_complete_precopy()
  block: Fix anonymous BBs in blk_root_inactivate()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-12 10:43:32 +01:00
Peter Maydell
56faeb9bb6 block/gluster.c: Handle qdict_array_entries() failure
In qemu_gluster_parse_json(), the call to qdict_array_entries()
could return a negative error code, which we were ignoring
because we assigned the result to an unsigned variable.
Fix this by using the 'int' type instead, which matches the
return type of qdict_array_entries() and also the type
we use for the loop enumeration variable 'i'.

(Spotted by Coverity, CID 1360960.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1496682098-1540-1-git-send-email-peter.maydell@linaro.org
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-06-09 08:41:29 -04:00
Jeff Cody
719fc28c80 block: fix external snapshot abort permission error
In external_snapshot_abort(), we try to undo what was done in
external_snapshot_prepare() calling bdrv_replace_node() to swap the
nodes back.  However, we receive a permissions error as writers are
blocked on the old node, which is now the new node backing file.

An easy fix (initially suggested by Kevin Wolf) is to call
bdrv_set_backing_hd() on the new node, to set the backing node to NULL.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-09 13:46:20 +02:00
Peter Maydell
272545cf21 block/qcow.c: Fix memory leak in qcow_create()
Coverity points out that the code path in qcow_create() for
the magic "fat:" backing file name leaks the memory used to
store the filename (CID 1307771). Free the memory before
we overwrite the pointer.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-06-09 13:46:20 +02:00
Kevin Wolf
c3971b883a qemu-iotests: Test automatic commit job cancel on hot unplug
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-06-09 13:46:20 +02:00
Kevin Wolf
19ebd13ed4 commit: Fix use after free in completion
The final bdrv_set_backing_hd() could be working on already freed nodes
because the commit job drops its references (through BlockBackends) to
both overlay_bs and top already a bit earlier.

One way to trigger the bug is hot unplugging a disk for which
blockdev_mark_auto_del() cancels the block job.

Fix this by taking BDS-level references while we're still using the
nodes.

Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-06-09 13:46:13 +02:00
Kevin Wolf
49695eeb74 qemu-iotests: Block migration test
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-06-09 11:45:03 +02:00
Kevin Wolf
362fdf170c migration/block: Clean up BBs in block_save_complete()
We need to release any block migrations BlockBackends on the source
before successfully completing the migration because otherwise
inactivating the images will fail (inactivation only tolerates device
BBs).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
2017-06-09 11:45:03 +02:00
Kevin Wolf
f07fa4cbf0 migration: Inactivate images after .save_live_complete_precopy()
Block migration may still access the image during its
.save_live_complete_precopy() implementation, so we should only
inactivate the image afterwards.

Another reason for the change is that inactivating an image fails when
there is still a non-device BlockBackend using it, which includes the
BBs used by block migration. We want to give block migration a chance to
release the BBs before trying to inactivate the image (this will be done
in another patch).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
2017-06-09 11:45:03 +02:00
Kevin Wolf
93c26503e0 block: Fix anonymous BBs in blk_root_inactivate()
blk->name isn't an array, but a pointer that can be NULL. Checking for
an anonymous BB must involve a NULL check first, otherwise we get
crashes.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
2017-06-09 11:45:03 +02:00
Laurent Vivier
593080936a Revert "spapr: fix memory hot-unplugging"
This reverts commit fe6824d126.

Conflicts hw/ppc/spapr_drc.c, because get_index() has been renamed
spapr_get_index().

This didn't fix the problem. Once the hotplug has been started
some memory is allocated and some structures are allocated.
We don't free it when we ignore the unplug, and we can't because
they can be in use by the kernel.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-09 12:35:46 +10:00
Greg Kurz
b1fd36c363 xics: drop ICPStateClass::cpu_setup() handler
The cpu_setup() handler is only implemented by xics_kvm, where it really
does a typical "realize" job. Moreover, the realize() handler is called
shortly after cpu_setup(), on the same path.

This patch converts xics_kvm to implement realize() instead of cpu_setup().

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-09 12:17:59 +10:00
Greg Kurz
9ed656631d xics: setup cpu at realize time
Until recently, spapr used to allocate ICPState objects for the lifetime
of the machine. They would only be associated to vCPUs in xics_cpu_setup()
when plugging a CPU core.

Now that ICPState objects have the same lifecycle as vCPUs, it is
possible to associate them during realization.

This patch hence open-codes xics_cpu_setup() in icp_realize(). The vCPU
is passed as a property. Note that vCPU now needs to be realized first
for the IRQs to be allocated. It also needs to resetted before ICPState
realization in order to synchronize with KVM.

Since ICPState objects are freed when unrealized, xics_cpu_destroy() isn't
needed anymore and can be safely dropped.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-09 12:15:57 +10:00
Greg Kurz
100f738850 xics: pass appropriate types to realize() handlers.
It makes more sense to pass an IPCState * to handlers of ICPStateClass
instead of a DeviceState *, if only to benefit from compile time type
checking. The same goes with ICSStateClass.

While here, we also change the declaration of ICPStateClass in xics.h
for consistency.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-09 12:12:34 +10:00
Greg Kurz
ad265631c0 xics: introduce macros for ICP/ICS link properties
These properties are part of the XICS API. They deserve to appear
explicitely in the XICS header file.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-09 12:12:34 +10:00
Thomas Huth
3b95410507 hw/cpu: core.c can be compiled as common object
There does not seem to be any target specific code in core.c, so we can
put it into "common-obj" instead of "obj" to compile it only once for
all targets.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-09 12:02:55 +10:00
Marcel Apfelbaum
bc277a52fb hw/pcie: fix the generic pcie root port to support migration
Add msix state to pcie-root-ports's vmstate
in order to support migration.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-08 22:02:37 +03:00
Haozhong Zhang
20fdef58a0 nvdimm acpi: fix region format interface code
Per ACPI 6.2, section 5.2.25.6 and JEDEC Annex L Release 3, the
current region format interface code 0x201 indicates the block
addressed function interface 1, rather than a byte addressable
interface. Fix it by using 0x301 which indicates the byte addressable
no energy backed function interface 1.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-08 22:02:36 +03:00
Marc-André Lureau
277238f9f4 vhost-user-bridge: fix iov_restore_front() warning
CC      tests/vhost-user-bridge.o
/home/dgilbert/git/qemu-world3/tests/vhost-user-bridge.c:228:23: warning: variables 'front' and 'iov' used in loop condition not modified in loop body [-Wfor-loop-analysis]
    for (cur = front; front != iov; cur++) {
                      ^~~~~    ~~~
1 warning generated.

Fix the loop, document the function, and fix some related assert().

In practice, the loop bug was harmless because the front sg buffer is
enough to discard/restore the header size.

Reported-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Tested-by: Jens Freimann <jfreiman@redhat.com>
2017-06-08 22:02:36 +03:00
Marc-André Lureau
27d4c3789d test-char: start a /char/serial test
Quite limited test, to check that the chardev can be created with a
path and with the tty alias.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-06-08 17:58:13 +04:00
Marc-André Lureau
73119c2864 chardev: don't use alias names in parse_compat()
"parport" is considered "old" since commit 88a946d32d, when "parallel"
was added. Similarly for "tty" in commit d59044ef74.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-06-08 17:57:58 +04:00
Marc-André Lureau
d203c64398 char: fix alias devices regression
Fix regression from commit 4d43a603c7, where the serial and parallel
headers got removed from char.c, which broke the alias table.

Move the HAVE_CHARDEV_SERIAL/HAVE_CHARDEV_PARPORT to osdep.h instead
of being in separate headers.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-06-08 17:57:36 +04:00
Thomas Huth
4871dd4c3f hw/ppc/spapr: Adjust firmware name for PCI bridges
SLOF uses "pci" as name for PCI bridges nodes in the device tree instead
of "pci-bridges", so booting via bootindex from a device behind a PCI
bridge currently does not work since QEMU passes the wrong name in the
"qemu,boot-list" property. Fix it by changing the name of the PCI bridge
nodes to "pci" instead.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1459170
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-08 14:38:27 +10:00
Greg Kurz
a4d4edce7a xics: add reset() handler to ICPStateClass
Taking into account that qemu_set_irq() returns immediatly if its first
argument is NULL, icp_kvm_reset() largely duplicates icp_reset().

This patch introduces a reset() handler, so that the common logic can
be implemented in icp_reset() only.

While there we can also drop icp_kvm_realize() and icp_kvm_unrealize(). This
causes icp-kvm to be realized in icp_realize(), which sets icp->xics, but
it has no impact.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-08 14:38:27 +10:00
Greg Kurz
67b544d65f pnv_core: drop reference on ICPState object during CPU realization
Similarly to what was done to spapr with commit 249127d0df, this patch
ensures that we don't keep an extra reference on the ICPState object. Also
since the object was just created and not reparented yet, the call to
object_property_add_child() should never fail: let's pass &error_abort to
make this clear.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-08 14:38:27 +10:00
David Gibson
7980833619 spapr: Rework DRC name handling
DRC objects have a get_name method which returns the DRC name generated
when the DRC is created.  Replace that with a fixed spapr_drc_name()
function which generates the name on the fly from other information.  This
means:
  * We get rid of a method with only one implementation, and only local
    callers
  * We don't have to carry the name string around for the lifetime of the
    DRC
  * We use information added to the class structure to generate the name
    in standard format, so we don't need an explicit switch on drc type
    any more

We also eliminate the 'name' property; it's basically useless since the
only information in it can easily be deduced from other things.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-08 14:38:27 +10:00
David Gibson
6304fd27ef spapr: Fold spapr_phb_{add,remove}_pci_device() into their only callers
Both functions are fairly short, and so are their callers.  There's no
particular logical distinction between them, so fold them together.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-08 14:38:27 +10:00
David Gibson
0be4e88621 spapr: Change DRC attach & detach methods to functions
DRC objects have attach & detach methods, but there's only one
implementation.  Although there are some differences in its behaviour for
different DRC types, the overall structure is the same, so while we might
want different method implementations for some parts, we're unlikely to
want them for the top-level functions.

So, replace them with direct function calls.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-08 14:38:26 +10:00
David Gibson
cd74d27e42 spapr: Clean up handling of DR-indicator
There are 3 types of "indicator" associated with hotplug in the PAPR spec
the "allocation state", "isolation state" and "DR-indicator".  The first
two are intimately tied to the various state transitions associated with
hotplug.  The DR-indicator, however, is different and simpler.

It's basically just a guest controlled variable which can be used by the
guest to flag state or problems associated with a device.  The idea is that
the hypervisor can use it to present information back on management
consoles (on some machines with PowerVM it may even control physical LEDs
on the machine case associated with the relevant device).

For that reason, there's only ever likely to be a single update
implementation so the set_indicator_state method isn't useful.  Replace it
with a direct function call.

While we're there, make some small associated cleanups:
  * PAPR doesn't use the term "indicator state", just "DR-indicator" and
the allocation state and isolation state are also considered "indicators".
Rename things to be less confusing
  * Fold set_indicator_state() and rtas_set_indicator_state() into a single
rtas_set_dr_indicator() function.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-08 14:38:26 +10:00
David Gibson
7b7258f810 spapr: Clean up RTAS set-indicator
In theory the RTAS set-indicator call can be used for a number of
"indicators" defined by PAPR.  In practice the only ones we're ever likely
to implement are those used for Dynamic Reconfiguration (i.e. hotplug).
Because of this, the current implementation determines the associated DRC
object, before dispatching based on the type of indicator.

However, this means we also need a check that we're dealing with a DR
related indicator at all, which duplicates some of the logic from the
switch further down.

Even though it means a bit of code duplication, things work out cleaner if
we delegate the DRC lookup to the individual indicator type functions -
and it also allows some further cleanups.

While we're there, remove references to "sensor", a copy/paste artefact
from the related, but distinct "get-sensor" call.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-08 14:38:26 +10:00
David Gibson
454b580ae9 spapr: Don't misuse DR-indicator in spapr_recover_pending_dimm_state()
With some combinations of migration and hotplug we can lost temporary state
indicating how many DRCs (guest side hotplug handles) are still connected
to a DIMM object in the process of removal.  When we hit that situation
spapr_recover_pending_dimm_state() is used to scan more extensively and
work out the right number.

It does this using drc->indicator state to determine what state of
disconnection the DRC is in.  However, this is not safe, because the
indicator state is guest settable - in fact it's more-or-less a purely
guest->host notification mechanism which should have no bearing on the
internals of hotplug state management.

So, replace the test for this with a test on drc->dev, which is a purely
qemu side managed variable, and updated the same BQL critical section as
the indicator state.

This does introduce an off-by-one change, because the indicator state was
updated before the call to spapr_lmb_release() on the current DRC, whereas
drc->dev is updated afterwards.  That's corrected by always decrementing
the nr_lmbs value instead of only doing so in the case where we didn't
have to recover information.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-08 14:38:26 +10:00
David Gibson
f224d35be9 spapr: Clean up DR entity sense handling
DRC classes have an entity_sense method to determine (in a specific PAPR
sense) the presence or absence of a device plugged into a DRC.  However,
we only have one implementation of the method, which explicitly tests for
different DRC types.  This changes it to instead have different method
implementations for the two cases: "logical" and "physical" DRCs.

While we're at it, the entity sense method always returns RTAS_OUT_SUCCESS,
and the interesting value is returned via pass-by-reference.  Simplify this
to directly return the value we care about

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-08 14:38:26 +10:00
David Gibson
2c5534776b pseries: Correct panic behaviour for pseries machine type
The pseries machine type doesn't usually use the 'pvpanic' device as such,
because it has a firmware/hypervisor facility with roughly the same
purpose.  The 'ibm,os-term' RTAS call notifies the hypervisor that the
guest has crashed.

Our implementation of this call was sending a GUEST_PANICKED qmp event;
however, it was not doing the other usual panic actions, making its
behaviour different from pvpanic for no good reason.

To correct this, we should call qemu_system_guest_panicked() rather than
directly sending the panic event.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2017-06-08 14:38:18 +10:00
Greg Kurz
8a9e0e7b89 spapr: fix memory leak in spapr_memory_pre_plug()
The string returned by object_property_get_str() is dynamically allocated.

(Spotted by Coverity, CID 1375942)

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-08 11:05:31 +10:00
Greg Kurz
2d3e302ec2 target/ppc: fix memory leak in kvmppc_is_mem_backend_page_size_ok()
The string returned by object_property_get_str() is dynamically allocated.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-08 11:05:31 +10:00
Greg Kurz
ec69355bef target/ppc: pass const string to kvmppc_is_mem_backend_page_size_ok()
This function has three implementations. Two are stubs that do nothing
and the third one only passes the obj_path argument to:

Object *object_resolve_path(const char *path, bool *ambiguous);

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-08 11:05:31 +10:00
Peter Maydell
bbfa326fc8 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* virtio-scsi use-after-free fix (Fam)
* SMM fixes and improvements for TCG (myself, Mihail)
* irqchip and AddressSpaceDispatch cleanups and fixes (Peter)
* Coverity fix (Stefano)
* NBD cleanups and fixes (Vladimir, Eric, myself)
* RTC accuracy improvements and code cleanups (Guangrong+Yunfang)
* socket error reporting improvement (Daniel)
* GDB XML description for SSE registers (Abdallah)
* kvmclock update fix (Denis)
* SMM memory savings (Gonglei)
* -cpu 486 fix (myself)
* various bugfixes (Roman, Peter, myself, Thomas)
* rtc-test improvement (Guangrong)
* migration throttling fix (Felipe)
* create docs/ subdirectories (myself)

# gpg: Signature made Wed 07 Jun 2017 17:22:07 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (31 commits)
  docs: create config/, devel/ and spin/ subdirectories
  cpus: reset throttle_thread_scheduled after sleep
  kvm: don't register smram_listener when smm is off
  nbd: make it thread-safe, fix qcow2 over nbd
  target/i386: Add GDB XML description for SSE registers
  i386/kvm: do not zero out segment flags if segment is unusable or not present
  edu: fix memory leak on msi_broken platforms
  linuxboot_dma: compile for i486
  kvmclock: update system_time_msr address forcibly
  nbd: Fully initialize client in case of failed negotiation
  sockets: improve error reporting if UNIX socket path is too long
  i386: fix read/write cr with icount option
  target/i386: use multiple CPU AddressSpaces
  target/i386: enable A20 automatically in system management mode
  virtio-scsi: Unset hotplug handler when unrealize
  exec: simplify phys_page_find() params
  nbd/client.c: use errp instead of LOG
  nbd: add errp to read_sync, write_sync and drop_sync
  nbd: add errp parameter to nbd_wr_syncv()
  nbd: read_sync and friends: return 0 on success
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-07 18:24:08 +01:00
Paolo Bonzini
ac06724a71 docs: create config/, devel/ and spin/ subdirectories
Developer documentation should be its own manual.  As a start, move all
developer-oriented files to a separate directory.

Also move non-text files to their own directories: docs/config/ for
QEMU -readconfig input, and docs/spin/ for formal models to be used
with the SPIN model checker.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:03 +02:00
Felipe Franciosi
90bb0c0421 cpus: reset throttle_thread_scheduled after sleep
Currently, the throttle_thread_scheduled flag is reset back to 0 before
sleeping (as part of the throttling logic). Given that throttle_timer
(well, any timer) may tick with a slight delay, it so happens that under
heavy throttling (ie. close or on CPU_THROTTLE_PCT_MAX) the tick may
schedule a further cpu_throttle_thread() work item after the flag reset,
but before the previous sleep completed. This results on the vCPU thread
sleeping continuously for potentially several seconds in a row.

The chances of that happening can be drastically minimised by resetting
the flag after the sleep.

Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
Signed-off-by: Malcolm Crossley <malcolm@nutanix.com>
Message-Id: <1495229390-18909-1-git-send-email-felipe@nutanix.com>
Acked-by: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:03 +02:00
Gonglei
d870cfdea5 kvm: don't register smram_listener when smm is off
If the user set disable smm by '-machine smm=off', we
should not register smram_listener so that we can
avoid waster memory in kvm since the added sencond
address space.

Meanwhile we should assign value of the global kvm_state
before invoking the kvm_arch_init(), because
pc_machine_is_smm_enabled() may use it by kvm_has_mm().

Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Message-Id: <1496316915-121196-1-git-send-email-arei.gonglei@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Paolo Bonzini
6bdcc018a6 nbd: make it thread-safe, fix qcow2 over nbd
NBD is not thread safe, because it accesses s->in_flight without
a CoMutex.  Fixing this will be required for multiqueue.
CoQueue doesn't have spurious wakeups but, when another coroutine can
run between qemu_co_queue_next's wakeup and qemu_co_queue_wait's
re-locking of the mutex, the wait condition can become false and
a loop is necessary.

In fact, it turns out that the loop is necessary even without this
multi-threaded scenario.  A particular sequence of coroutine wakeups
is happening ~80% of the time when starting a guest with qcow2 image
served over NBD (i.e. qemu-nbd --format=raw, and QEMU's -drive option
has -format=qcow2).  This patch fixes that issue too.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Abdallah Bouassida
b8158192fa target/i386: Add GDB XML description for SSE registers
Add an XML description for SSE registers (XMM+MXCSR) for both X86
and X86-64 architectures in the GDB stub:
- configure: Define gdb_xml_files for the X86 targets (32 and 64bit).
- gdb-xml/i386-32bit-sse.xml & gdb-xml/i386-64bit-sse.xml: The XML files
that contain a description of the XMM + MXCSR registers.
- gdb-xml/i386-32bit.xml & gdb-xml/i386-64bit.xml: wrappers that include
the XML file of the core registers and the other XML file of the SSE registers.
- target/i386/cpu.c: Modify the gdb_core_xml_file to the new XML wrapper,
  modify the gdb_num_core_regs to fit the registers number defined in each
  XML file.

Signed-off-by: Abdallah Bouassida <abdallah.bouassida@lauterbach.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Roman Pen
d45fc087c2 i386/kvm: do not zero out segment flags if segment is unusable or not present
This is a fix for the problem [1], where VMCB.CPL was set to 0 and interrupt
was taken on userspace stack.  The root cause lies in the specific AMD CPU
behaviour which manifests itself as unusable segment attributes on SYSRET[2].

Here in this patch flags are not touched even segment is unusable or is not
present, therefore CPL (which is stored in DPL field) should not be lost and
will be successfully restored on kvm/svm kernel side.

Also current patch should not break desired behavior described in this commit:

4cae9c9796 ("target-i386: kvm: clear unusable segments' flags in migration")

since present bit will be dropped if segment is unusable or is not present.

This is the second part of the whole fix of the corresponding problem [1],
first part is related to kvm/svm kernel side and does exactly the same:
segment attributes are not zeroed out.

[1] Message id: CAJrWOzD6Xq==b-zYCDdFLgSRMPM-NkNuTSDFEtX=7MreT45i7Q@mail.gmail.com
[2] Message id: 5d120f358612d73fc909f5bfa47e7bd082db0af0.1429841474.git.luto@kernel.org

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Signed-off-by: Mikhail Sennikovskii <mikhail.sennikovskii@profitbricks.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Michael Chapman <mike@very.puzzling.org>
Cc: qemu-devel@nongnu.org
Message-Id: <20170601085604.12980-1-roman.penyaev@profitbricks.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Paolo Bonzini
c25a67f0c3 edu: fix memory leak on msi_broken platforms
If msi_init fails, the thread has already been created and the
mutex/condvar are not destroyed.  Initialize everything only
after the point where pci_edu_realize cannot fail.

Reported-by: Markus Armbruster <armbru@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Paolo Bonzini
7e01838510 linuxboot_dma: compile for i486
The ROM uses the cmovne instruction, which is new in Pentium Pro and does not
work when running QEMU with "-cpu 486".  Avoid producing that instruction.

Suggested-by: Richard W.M. Jones <rjones@redhat.com>
Suggested-by: Thomas Huth <thuth@redhat.com>
Reported-by: Rob Landley <rob@landley.net>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Denis Plotnikov
e2b6c1712e kvmclock: update system_time_msr address forcibly
Do an update of system_time_msr address every time before reading
the value of tsc_timestamp from guest's kvmclock page.

There is no other code paths which ensure that qemu has an up-to-date
value of system_time_msr. So, force this update on guest's tsc_timestamp
reading.

This bug causes effect on those nested setups which turn off TPR access
interception for L2 guests and that access being intercepted by L0 doesn't
show up in L1.
Linux bootstrap initiate kvmclock before APIC initializing causing TPR access.
That's why on L1 guests, having TPR interception turned on for L2, the effect
of the bug is not revealed.

This patch fixes this problem by making sure it knows the correct
system_time_msr address every time it is needed.

Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Message-Id: <1496054944-25623-1-git-send-email-dplotnikov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Eric Blake
df8ad9f128 nbd: Fully initialize client in case of failed negotiation
If a non-NBD client connects to qemu-nbd, we would end up with
a SIGSEGV in nbd_client_put() because we were trying to
unregister the client's association to the export, even though
we skipped inserting the client into that list.  Easy trigger
in two terminals:

$ qemu-nbd -p 30001 --format=raw file
$ nmap 127.0.0.1 -p 30001

nmap claims that it thinks it connected to a pago-services1
server (which probably means nmap could be updated to learn the
NBD protocol and give a more accurate diagnosis of the open
port - but that's not our problem), then terminates immediately,
so our call to nbd_negotiate() fails.  The fix is to reorder
nbd_co_client_start() to ensure that all initialization occurs
before we ever try talking to a client in nbd_negotiate(), so
that the teardown sequence on negotiation failure doesn't fault
while dereferencing a half-initialized object.

While debugging this, I also noticed that nbd_update_server_watch()
called by nbd_client_closed() was still adding a channel to accept
the next client, even when the state was no longer RUNNING.  That
is fixed by making nbd_can_accept() pay attention to the current
state.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1451614

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170527030421.28366-1-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Daniel P. Berrange
ad9579aaa1 sockets: improve error reporting if UNIX socket path is too long
The 'struct sockaddr_un' only allows 108 bytes for the socket
path.

If the user supplies a path, QEMU uses snprintf() to silently
truncate it when too long. This is undesirable because the user
will then be unable to connect to the path they asked for.

If the user doesn't supply a path, QEMU builds one based on
TMPDIR, but if that leads to an overlong path, it mistakenly
uses error_setg_errno() with a stale errno value, because
snprintf() does not set errno on truncation.

In solving this the code needed some refactoring to ensure we
don't pass 'un.sun_path' directly to any APIs which expect
NUL-terminated strings, because the path is not required to
be terminated.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <20170525155300.22743-1-berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Mihail Abakumov
5b003a40bb i386: fix read/write cr with icount option
Running Windows with icount causes a crash in instruction of write cr.
This patch fixes it.

Reading and writing cr cause an icount read because there are called
cpu_get_apic_tpr and cpu_set_apic_tpr functions. So, there is need
gen_io_start()/gen_io_end() calls.

Signed-off-by: Mihail Abakumov <mikhail.abakumov@ispras.ru>
Message-Id: <ffb376034ff184f2fcbe93d5317d9e76@ispras.ru>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Paolo Bonzini
f8c45c6550 target/i386: use multiple CPU AddressSpaces
This speeds up SMM switches.  Later on it may remove the need to take
the BQL, and it may also allow to reuse code between TCG and KVM.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Paolo Bonzini
c8bc83a4dd target/i386: enable A20 automatically in system management mode
Ignore env->a20_mask when running in system management mode.

Reported-by: Anthony Xu <anthony.xu@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1494502528-12670-1-git-send-email-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-07 18:22:02 +02:00
Peter Maydell
64175afc69 arm_gicv3: Fix ICC_BPR1 reset value when EL3 not implemented
If EL3 is not implemented (ie only one security state) then the
one and only ICC_BPR1 register behaves like the Non-secure
ICC_BPR1 in an EL3-present configuration. In particular, its
reset value is GIC_MIN_BPR_NS, not GIC_MIN_BPR.

Correct the erroneous reset value; this fixes a problem where
we might hit the assert added in commit a89ff39ee9.

Reported-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1496849369-30282-1-git-send-email-peter.maydell@linaro.org
2017-06-07 17:21:44 +01:00
Bruno Dominguez
11cde1c810 configure: split c and cxx extra flags
There was no possibility to add specific cxx flags using the configure
file. So A new entrance has been created to support it.

Duplication of information in configure and rules.mak. Taking
QEMU_CFLAGS and add them to QEMU_CXXFLAGS, now the value of
QEMU_CXXFLAGS is stored in config-host.mak, so there is no need for
it.

The makefile for libvixl was adding flags for QEMU_CXXFLAGS in
QEMU_CFLAGS because of the addition in rules.mak. That was removed, so
adding them where it should be.

Signed-off-by: Bruno Dominguez <bru.dominguez@gmail.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1496754467-20893-1-git-send-email-bru.dominguez@gmail.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-07 15:29:46 +01:00
Peter Maydell
b55a69fe5f Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170607' into staging
migration/next for 20170607

# gpg: Signature made Wed 07 Jun 2017 10:02:01 BST
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration/20170607:
  qemu/migration: fix the double free problem on from_src_file
  ram: Make RAMState dynamic
  ram: Use MigrationStats for statistics
  ram: Move ZERO_TARGET_PAGE inside XBZRLE
  ram: Call migration_page_queue_free() at ram_migration_cleanup()
  ram: We only print throttling information sometimes
  ram: Unfold get_xbzrle_cache_stats() into populate_ram_info()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-07 15:06:42 +01:00
Roman Pen
528f449f59 coroutine-lock: do not touch coroutine after another one has been entered
Submission of requests on linux aio is a bit tricky and can lead to
requests completions on submission path:

44713c9e85 ("linux-aio: Handle io_submit() failure gracefully")
0ed93d84ed ("linux-aio: process completions from ioq_submit()")

That means that any coroutine which has been yielded in order to wait
for completion can be resumed from submission path and be eventually
terminated (freed).

The following use-after-free crash was observed when IO throttling
was enabled:

 Program received signal SIGSEGV, Segmentation fault.
 [Switching to Thread 0x7f5813dff700 (LWP 56417)]
 virtqueue_unmap_sg (elem=0x7f5804009a30, len=1, vq=<optimized out>) at virtio.c:252
 (gdb) bt
 #0  virtqueue_unmap_sg (elem=0x7f5804009a30, len=1, vq=<optimized out>) at virtio.c:252
                              ^^^^^^^^^^^^^^
                              remember the address

 #1  virtqueue_fill (vq=0x5598b20d21b0, elem=0x7f5804009a30, len=1, idx=0) at virtio.c:282
 #2  virtqueue_push (vq=0x5598b20d21b0, elem=elem@entry=0x7f5804009a30, len=<optimized out>) at virtio.c:308
 #3  virtio_blk_req_complete (req=req@entry=0x7f5804009a30, status=status@entry=0 '\000') at virtio-blk.c:61
 #4  virtio_blk_rw_complete (opaque=<optimized out>, ret=0) at virtio-blk.c:126
 #5  blk_aio_complete (acb=0x7f58040068d0) at block-backend.c:923
 #6  coroutine_trampoline (i0=<optimized out>, i1=<optimized out>) at coroutine-ucontext.c:78

 (gdb) p * elem
 $8 = {index = 77, out_num = 2, in_num = 1,
       in_addr = 0x7f5804009ad8, out_addr = 0x7f5804009ae0,
       in_sg = 0x0, out_sg = 0x7f5804009a50}
       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
       'in_sg' and 'out_sg' are invalid.
       e.g. it is impossible that 'in_sg' is zero,
       instead its value must be equal to:

       (gdb) p/x 0x7f5804009ad8 + sizeof(elem->in_addr[0]) + 2 * sizeof(elem->out_addr[0])
       $26 = 0x7f5804009af0

Seems 'elem' was corrupted.  Meanwhile another thread raised an abort:

 Thread 12 (Thread 0x7f57f2ffd700 (LWP 56426)):
 #0  raise () from /lib/x86_64-linux-gnu/libc.so.6
 #1  abort () from /lib/x86_64-linux-gnu/libc.so.6
 #2  qemu_coroutine_enter (co=0x7f5804009af0) at qemu-coroutine.c:113
 #3  qemu_co_queue_run_restart (co=0x7f5804009a30) at qemu-coroutine-lock.c:60
 #4  qemu_coroutine_enter (co=0x7f5804009a30) at qemu-coroutine.c:119
                           ^^^^^^^^^^^^^^^^^^
                           WTF?? this is equal to elem from crashed thread

 #5  qemu_co_queue_run_restart (co=0x7f57e7f16ae0) at qemu-coroutine-lock.c:60
 #6  qemu_coroutine_enter (co=0x7f57e7f16ae0) at qemu-coroutine.c:119
 #7  qemu_co_queue_run_restart (co=0x7f5807e112a0) at qemu-coroutine-lock.c:60
 #8  qemu_coroutine_enter (co=0x7f5807e112a0) at qemu-coroutine.c:119
 #9  qemu_co_queue_run_restart (co=0x7f5807f17820) at qemu-coroutine-lock.c:60
 #10 qemu_coroutine_enter (co=0x7f5807f17820) at qemu-coroutine.c:119
 #11 qemu_co_queue_run_restart (co=0x7f57e7f18e10) at qemu-coroutine-lock.c:60
 #12 qemu_coroutine_enter (co=0x7f57e7f18e10) at qemu-coroutine.c:119
 #13 qemu_co_enter_next (queue=queue@entry=0x5598b1e742d0) at qemu-coroutine-lock.c:106
 #14 timer_cb (blk=0x5598b1e74280, is_write=<optimized out>) at throttle-groups.c:419

Crash can be explained by access of 'co' object from the loop inside
qemu_co_queue_run_restart():

  while ((next = QSIMPLEQ_FIRST(&co->co_queue_wakeup))) {
      QSIMPLEQ_REMOVE_HEAD(&co->co_queue_wakeup, co_queue_next);
                           ^^^^^^^^^^^^^^^^^^^^
                           on each iteration 'co' is accessed,
                           but 'co' can be already freed

      qemu_coroutine_enter(next);
  }

When 'next' coroutine is resumed (entered) it can in its turn resume
'co', and eventually free it.  That's why we see 'co' (which was freed)
has the same address as 'elem' from the first backtrace.

The fix is obvious: use temporary queue and do not touch coroutine after
first qemu_coroutine_enter() is invoked.

The issue is quite rare and happens every ~12 hours on very high IO
and CPU load (building linux kernel with -j512 inside guest) when IO
throttling is enabled.  With the fix applied guest is running ~35 hours
and is still alive so far.

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170601160847.23720-1-roman.penyaev@profitbricks.com
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Fam Zheng <famz@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: qemu-devel@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-07 14:39:00 +01:00
Stefan Hajnoczi
3a586d2f0b .gdbinit: load QEMU sub-commands when gdb starts
The scripts/qemu-gdb.py file is not easily discoverable.  Add a .gdbinit
file so GDB either loads qemu-gdb.py automatically or prints a message
informing the user how to enable them (some systems disable ./.gdbinit
loading for security reasons).

Symlink .gdbinit and the scripts directory in order to make out-of-tree
builds work.  The scripts directory is used to find the qemu-gdb.py file
specified by a relative path in .gdbinit.

Suggested-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Tested-by: Eric Blake <eblake@redhat.com>
Message-id: 20170517124042.1430-1-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-07 14:38:45 +01:00
Philippe Mathieu-Daudé
f652402487 coccinelle: fix typo in comment
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-07 14:38:44 +01:00
Philippe Mathieu-Daudé
462e5d5065 oslib: strip trailing '\n' from error_setg() string argument
spotted by Coccinelle script scripts/coccinelle/err-bad-newline.cocci

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-07 14:38:44 +01:00
Jose Ricardo Ziviani
249e9f792c simpletrace: Improve the error message if event is not declared
Today, if we use a trace-event file which does not declare an event
existing in the log file we'll get the following error:

$ scripts/simpletrace.py trace-events trace-68508
Traceback (most recent call last):
  File "scripts/simpletrace.py", line 242, in <module>
    run(Formatter())
  File "scripts/simpletrace.py", line 217, in run
    process(events, sys.argv[2], analyzer, read_header=read_header)
  File "scripts/simpletrace.py", line 192, in process
    for rec in read_trace_records(edict, log):
  File "scripts/simpletrace.py", line 107, in read_trace_records
    rec = read_record(edict, idtoname, fobj)
  File "scripts/simpletrace.py", line 71, in read_record
    return get_record(edict, idtoname, rechdr, fobj)
  File "scripts/simpletrace.py", line 45, in get_record
    event = edict[name]
KeyError: 'qemu_mutex_locked'

This patch improves this error by adding a hint instead of just that
KeyError log:

$ scripts/simpletrace.py trace-events trace-68508
'qemu_mutex_locked' event is logged but is not declared in the trace
events file, try using trace-events-all instead.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1496075404-8845-1-git-send-email-joserz@linux.vnet.ibm.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-06-07 14:34:19 +01:00
Peter Maydell
0db1851bec Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.10-pull-request' into staging
# gpg: Signature made Wed 07 Jun 2017 10:29:50 BST
# gpg:                using RSA key 0xF30C38BD3F2FBE3C
# gpg: Good signature from "Laurent Vivier <lvivier@redhat.com>"
# gpg:                 aka "Laurent Vivier <laurent@vivier.eu>"
# gpg:                 aka "Laurent Vivier (Red Hat) <lvivier@redhat.com>"
# Primary key fingerprint: CD2F 75DD C8E3 A4DC 2E4F  5173 F30C 38BD 3F2F BE3C

* remotes/vivier/tags/m68k-for-2.10-pull-request:
  target/m68k: implement rtd

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-07 11:56:00 +01:00
Peter Maydell
b187e2b530 Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Wed 07 Jun 2017 04:29:20 BST
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  Revert "Change net/socket.c to use socket_*() functions" again
  net/rocker: Cleanup the useless return value check

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-07 11:16:22 +01:00
Laurent Vivier
18059c9e16 target/m68k: implement rtd
Add "Return and Deallocate" (rtd) instruction.

  RTD #d

    (SP) -> PC
    SP + 4 + d -> SP

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Tested-By: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Message-Id: <20170605100014.22981-1-laurent@vivier.eu>
2017-06-07 11:18:30 +02:00
Peter Maydell
8b3e9ca74c Merge remote-tracking branch 'remotes/rth/tags/pull-s390-20170606' into staging
Queued s390 patches

# gpg: Signature made Wed 07 Jun 2017 01:18:29 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-s390-20170606: (70 commits)
  target/s390x: addressing exceptions are suppressing
  target/s390x: mark ETF2 and ETF2-ENH facilities as available
  target/s390x: check alignment in CDSG in the !CONFIG_ATOMIC128 case
  target/s390x: implement STORE PAIR TO QUADWORD
  target/s390x: implement LOAD PAIR FROM QUADWORD
  target/s390x: implement TRANSLATE ONE/TWO TO ONE/TWO
  target/s390x: implement TEST DECIMAL
  target/s390x: implement UNPACK UNICODE
  target/s390x: implement UNPACK ASCII
  target/s390x: implement PACK UNICODE
  target/s390x: implement PACK ASCII
  target/s390x: implement MOVE LONG UNICODE
  target/s390x: implement COMPARE LOGICAL LONG UNICODE
  target/s390x: improve MOVE LONG and MOVE LONG EXTENDED
  target/s390x: fix adj_len_to_page
  target/s390x: implement COMPARE LOGICAL LONG
  target/s390x: fix COMPARE LOGICAL LONG EXTENDED
  target/s390x: improve 24-bit and 31-bit lengths read/write
  target/s390x: improve 24-bit and 31-bit addresses write
  target/s390x: improve 24-bit and 31-bit addresses read
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-07 10:14:54 +01:00
QingFeng Hao
eefff991d0 qemu/migration: fix the double free problem on from_src_file
In load_snapshot, mis->from_src_file is freed twice, the first free is by
qemu_fclose, the second is by migration_incoming_state_destroy and
it causes Illegal instruction exception. The fix is just to remove the
first free.

This problem is found by qemu-iotests case 068 since commit
"660819b migration: shut src return path unconditionally". The error is:
068 1s ... - output mismatch (see 068.out.bad)
    --- tests/qemu-iotests/068.out	2017-05-06 01:00:26.417270437 +0200
    +++ 068.out.bad	2017-06-03 13:59:55.360274640 +0200
    @@ -6,6 +6,8 @@
     QEMU X.Y.Z monitor - type 'help' for more information
     (qemu) savevm 0
     (qemu) quit
    +./common.config: line 107: 242472 Illegal instruction     (core dumped) ( if [ -n "${QEMU_NEED_PID}" ]; then
    +    echo $BASHPID > "${QEMU_TEST_DIR}/qemu-${_QEMU_HANDLE}.pid";
    +fi; exec "$QEMU_PROG" $QEMU_OPTIONS "$@" )
     QEMU X.Y.Z monitor - type 'help' for more information
    -(qemu) quit
    -*** done
    +(qemu) *** done

Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-07 10:20:56 +02:00
Juan Quintela
53518d9448 ram: Make RAMState dynamic
We create the variable while we are at migration and we remove it
after migration.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-07 10:20:55 +02:00
Juan Quintela
9360447d34 ram: Use MigrationStats for statistics
RAM Statistics need to survive migration to make info migrate work, so we
need to store them outside of RAMState.  As we already have an struct
with those fields, just used them. (MigrationStats and XBZRLECacheStats).

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
2017-06-07 10:20:54 +02:00
Juan Quintela
c00e092832 ram: Move ZERO_TARGET_PAGE inside XBZRLE
It was only used by XBZRLE anyways.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
2017-06-07 10:20:54 +02:00
Juan Quintela
83c13382e4 ram: Call migration_page_queue_free() at ram_migration_cleanup()
We shouldn't be using memory later than that.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
2017-06-07 10:20:53 +02:00
Juan Quintela
338182c83c ram: We only print throttling information sometimes
Change it to be consistent with everything else.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-06-07 10:20:52 +02:00
Juan Quintela
114f5aee02 ram: Unfold get_xbzrle_cache_stats() into populate_ram_info()
They were called consecutively always.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-06-07 10:20:52 +02:00
Daniel P. Berrange
6701e5514b Revert "Change net/socket.c to use socket_*() functions" again
This reverts commit 883e4f7624.

This code changed net/socket.c from using socket()+connect(),
to using socket_connect(). In theory this is great, but in
practice this has completely broken the ability to connect
the frontend and backend:

  $ ./x86_64-softmmu/qemu-system-x86_64 \
       -device e1000,id=e0,netdev=hn0,mac=DE:AD:BE:EF:AF:05 \
       -netdev socket,id=hn0,connect=localhost:1234
  qemu-system-x86_64: -device e1000,id=e0,netdev=hn0,mac=DE:AD:BE:EF:AF:05: Property 'e1000.netdev' can't find value 'hn0'

The old code would call net_socket_fd_init() synchronously,
while letting the connect() complete in the backgorund. The
new code moved net_socket_fd_init() so that it is only called
after connect() completes in the background.

Thus at the time we initialize the NIC frontend, the backend
does not exist.

The socket_connect() conversion as done is a bad fit for the
current code, since it did not try to change the way it deals
with async connection completion. Rather than try to fix this,
just revert the socket_connect() conversion entirely.

The code is about to be converted to use QIOChannel which
will let the problem be solved in a cleaner manner. This
revert is more suitable for stable branches in the meantime.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-06-07 10:58:31 +08:00
Mao Zhongyi
4cee3cf35c net/rocker: Cleanup the useless return value check
None of pci_dma_read()'s callers check the return value except
rocker. There is no need to check it because it always return
0. So the check work is useless. Remove it entirely.

Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-06-07 10:58:31 +08:00
David Hildenbrand
49921d6886 target/s390x: addressing exceptions are suppressing
We have to make the address in the old PSW point at the next
instruction, as addressing exceptions are suppressing and not
nullifying.

I assume that there are a lot of other broken cases (as most instructions
we care about are suppressing) - all trigger_pgm_exception() specifying
and explicit number or ILEN_LATER look suspicious, however this is another
story that might require bigger changes (and I have to understand when
the address might already have been incremented first).

This is needed to make an upcoming kvm-unit-test work.

Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170529121228.2789-1-david@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:25:14 -07:00
Aurelien Jarno
3190dfc5e1 target/s390x: mark ETF2 and ETF2-ENH facilities as available
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-30-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:25:14 -07:00
Aurelien Jarno
c0080f1bdb target/s390x: check alignment in CDSG in the !CONFIG_ATOMIC128 case
The CDSG instruction requires a 16-byte alignement, as expressed in
the MO_ALIGN_16 passed to helper_atomic_cmpxchgo_be_mmu. In the non
parallel case, use check_alignment to enforce this.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170604202034.16615-4-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:25:14 -07:00
Aurelien Jarno
c21b610f58 target/s390x: implement STORE PAIR TO QUADWORD
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170604202034.16615-3-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:25:14 -07:00
Aurelien Jarno
e22dfdb28d target/s390x: implement LOAD PAIR FROM QUADWORD
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170604202034.16615-2-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:25:14 -07:00
Aurelien Jarno
4065ae7634 target/s390x: implement TRANSLATE ONE/TWO TO ONE/TWO
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-29-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:44 -07:00
Aurelien Jarno
5d4a655a41 target/s390x: implement TEST DECIMAL
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-28-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:44 -07:00
Aurelien Jarno
1541778721 target/s390x: implement UNPACK UNICODE
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-27-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
1a35f08a22 target/s390x: implement UNPACK ASCII
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-26-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
4e256bef65 target/s390x: implement PACK UNICODE
Use a common helper with PACK ASCII as the differences are limited to
the stride of the source operand.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-25-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
3bd3d6d302 target/s390x: implement PACK ASCII
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-24-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
16f2e4b841 target/s390x: implement MOVE LONG UNICODE
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-23-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
31006af3bb target/s390x: implement COMPARE LOGICAL LONG UNICODE
For that we need to make program_interrupt available to qemu-user.
Fortunately there is almost nothing to change as both kvm_enabled and
CONFIG_KVM evaluate to false in that case.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-22-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
d332712134 target/s390x: improve MOVE LONG and MOVE LONG EXTENDED
As MVCL and MVCLE only differ by their operands, use a common
do_mvcl helper. Optimize it calling fast_memmove and fast_memset.
Correctly write back addresses. Check that r1 and r2/r3 registers
are even.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-21-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
22f04c3198 target/s390x: fix adj_len_to_page
adj_len_to_page doesn't return the correct result when the address
is already page aligned and the length is bigger than a page. Fix that.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-20-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
5c2b48a8f0 target/s390x: implement COMPARE LOGICAL LONG
As CLCL and CLCLE mostly differ by their operands, use a common do_clcl
helper. Another difference is that CLCL is not interruptible.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-19-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
84aa07f109 target/s390x: fix COMPARE LOGICAL LONG EXTENDED
There are multiple issues with the COMPARE LOGICAL LONG EXTENDED
instruction:
- The test between the two operands is inverted, leading to an inversion
  of the cc values 1 and 2.
- The address and length of an operand continue to be decreased after
  reaching the end of this operand. These values are then wrong write
  back to the registers.
- We should limit the amount of bytes to process, so that interrupts can
  be served correctly.

At the same time rename dest into src1 and src into src3 to match the
operand names and make the code less confusing.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-18-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
29a58fd85f target/s390x: improve 24-bit and 31-bit lengths read/write
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-17-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
a65047afe5 target/s390x: improve 24-bit and 31-bit addresses write
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-16-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:43 -07:00
Aurelien Jarno
a5c3cedd73 target/s390x: improve 24-bit and 31-bit addresses read
Improve fix_address to also handle the 24-bit mode. Rename fix_address
to wrap_address to better explain what is changed.

Replace the calls to get_address with x2 = 0 and b2 = 0 by
call to wrap_address, leading to the removal of this function. Rename
get_address_31fix into get_address.

Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-15-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:42 -07:00
Aurelien Jarno
01f8db8857 target/s390x: implement MOVE ZONES
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-14-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:42 -07:00
Aurelien Jarno
fdc0a7474a target/s390x: implement MOVE WITH OFFSET
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-13-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:42 -07:00
Aurelien Jarno
256dab6fe8 target/s390x: implement MOVE NUMERICS
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-12-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:42 -07:00
Aurelien Jarno
6c9deca8a1 target/s390x: implement MOVE INVERSE
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-11-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:42 -07:00
Aurelien Jarno
9c8be59836 target/s390x: implement COMPARE AND SIGNAL
These functions differ from COMPARE by generating an exception for a
QNaN input. Use the non quiet version of floatXX_compare.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-10-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 15:20:38 -07:00
Aurelien Jarno
76c574906e target/s390x: implement PACK
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-7-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Aurelien Jarno
0c0974d785 target/s390x: implement TEST ADDRESSING MODE
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-6-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Aurelien Jarno
6699adfc18 target/s390x: implement TEST AND SET
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-5-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Aurelien Jarno
1f58720c5f target/s390x: implement local-TLB-clearing in IPTE
And at the same time make IPTE SMP aware.

Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-4-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Aurelien Jarno
8a4719f527 target/s390x: remove some Linux assumptions from IPTE
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-3-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Aurelien Jarno
51a718bf3d target/s390x: remove dead code in translate.c
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-2-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Thomas Huth
fc7fbcbc48 target/s390x/cpu_models: Allow some additional feature bits for the "qemu" CPU
Currently we only present the plain z900 feature bits to the guest,
but QEMU already emulates some additional features (but not all of
the next CPU generation, so we can not use the next CPU level as
default yet). Since newer Linux kernels are checking the feature bits
and refuse to work if a required feature is missing, it would be nice
to have a way to present more of the supported features when we are
running with the "qemu" CPU.
This patch now adds the supported features to the "full_feat" bitmap,
so that additional features can be enabled on the command line now,
for example with:

 qemu-system-s390x -cpu qemu,stfle=true,ldisp=true,eimm=true,stckf=true

Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1495704132-5675-1-git-send-email-thuth@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Richard Henderson
d376f123c7 target/s390x: Re-implement a few EXECUTE target insns directly
While the previous patch is required for proper conformance,
the vast majority of target insns are MVC and XC for implementing
memmove and memset respectively.  The next most common are CLC,
TR, and SVC.

Implementing these (and a few others for which we already have
an implementation) directly is faster than going through full
translation to a TB.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Richard Henderson
303c681a8f target/s390x: Implement EXECUTE via new TranslationBlock
Previously, helper_ex would construct the insn and then implement
the insn via direct calls other helpers.  This was sufficient to
boot Linux but that is all.

It is easy enough to go the whole nine yards by stashing state for
EXECUTE within the cpu, and then rely on a new TB to be created
that properly and completely interprets the insn.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Richard Henderson
06fc03486c target/s390x: End the TB after EXECUTE
This split will be required for implementing EXECUTE properly.
Do this now as a separate step to aid comparison of before and
after TB listings.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Richard Henderson
99e57856f6 target/s390x: Save current ilen during translation
Use this saved value instead of recomputing from next_pc difference.

Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Richard Henderson
b26de9518d target/s390x: Implement CSPG
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Richard Henderson
31a18b4575 target/s390x: Use atomic operations for COMPARE SWAP PURGE
Also provide the cross-cpu tlb flushing required by the PoO.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:32 -07:00
Richard Henderson
a72da8b7f5 target/s390x: Fix EXECUTE with R1==0
The PoO specifies that when R1==0, no ORing into the insn
loaded from storage takes place.  Load a zero for this case.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
8350079329 target/s390x: Fix some helper_ex problems
(1) The OR of the low bits or R1 into INSN were not being done
consistently; it was forgotten along all but the SVC path.
(2) The setting of ILEN was wrong on SVC path for EXRL.
(3) The data load for ICM read too much.

Fix these by consolidating data load at the beginning, using
get_ilen to control the number of bytes loaded, and ORing in
the byte from R1.  Use extract64 from the full aligned insn
to extract arguments.

Pass in ILEN rather than RET as the more natural way to give
the required data along the SVC path.

Modify ENV->CC_OP directly rather than include it in the
functional interface.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
b90fb26bde target/s390x: Use unwind data for helper_mvcs/mvcp
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
b157fbe6a9 target/s390x: Use unwind data for helper_lra
Fix saving exception_index around mmu_translate; eliminate a dead store.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
1f3ca41665 target/s390x: Use unwind data for helper_tprot
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
aef2b01a50 target/s390x: Use unwind data for helper_testblock
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
75d6240c59 target/s390x: Use unwind data for helper_stctl
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
1b642a732c target/s390x: Use unwind data for helper_lctl
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
97ae2149af target/s390x: Use unwind data for helper_lctlg
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
2c7e5f8c25 target/s390x: Use unwind data for helper_trt
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
d46cd62ff8 target/s390x: Use unwind data for helper_tre
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
981a8ea0c5 target/s390x: Use unwind data for helper_tr
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
84e1b98ba6 target/s390x: Use unwind data for helper_unpk
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
498644e99f target/s390x: Use unwind data for helper_cksm
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
4546137957 target/s390x: Use unwind data for helper_clcle
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
453e4c077d target/s390x: Use unwind data for helper_mvcle
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
7390fb79fd target/s390x: Use unwind data for helper_mvcl
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
44cf6c2e4b target/s390x: Use unwind data for helper_stam
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
9393c020bf target/s390x: Use unwind data for helper_lam
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
08a4cb793f target/s390x: Use unwind data for helper_mvst
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
7cf96fca4c target/s390x: Use unwind data for helper_mvpg
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
3cc8ca3dab target/s390x: Use unwind data for helper_clst
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
4663e82244 target/s390x: Use unwind data for helper_srst
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
868b5cbd91 target/s390x: Use unwind data for helper_clm
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
e79f56f4d6 target/s390x: Use unwind data for helper_clc
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
d3696812e3 target/s390x: Use unwind data for helper_mvc
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
9c009e88e3 target/s390x: Use unwind data for helper_xc
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
6fc2606e58 target/s390x: Use unwind data for helper_oc
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
349d078a26 target/s390x: Use unwind data for helper_nc
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
a5cfc2235b target/s390x: Move helper_ex to end of file
This will avoid needing forward declarations in following patches.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Richard Henderson
23cf9659b4 target/s390x: Use cpu_loop_exit_restore for tlb_fill
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Thomas Huth
f79f1ca4a2 target/s390x: Add support for the TEST BLOCK instruction
TEST BLOCK was likely once used to execute basic memory
tests, but nowadays it's just a (slow) way to clear a page.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1495128400-23759-1-git-send-email-thuth@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-06 14:34:31 -07:00
Fam Zheng
2cbe2de545 virtio-scsi: Unset hotplug handler when unrealize
This matches the qbus_set_hotplug_handler in realize, and it releases
the final reference to the embedded VirtIODevice so that it is
properly finalized.

A use-after-free is fixed with this patch, indirectly:
virtio_device_instance_finalize wasn't called at hot-unplug, and the
vdev->listener would be a dangling pointer in the global and the per
address space listener list. See also RHBZ 1449031.

Cc: qemu-stable@nongnu.org
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170518102808.30046-1-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:36 +02:00
Peter Xu
003a0cf2cd exec: simplify phys_page_find() params
It really only plays with the dispatchers, so the parameter list does
not need that complexity. This helps for readability at least.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1494838260-30439-2-git-send-email-peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:36 +02:00
Vladimir Sementsov-Ogievskiy
be41c100c0 nbd/client.c: use errp instead of LOG
Move to modern errp scheme from just LOGging errors.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170526110913.89098-1-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:36 +02:00
Vladimir Sementsov-Ogievskiy
e44ed99d19 nbd: add errp to read_sync, write_sync and drop_sync
There a lot of calls of these functions, which already have errp, which
they are filling themselves. On the other hand, nbd_wr_syncv has errp
parameter too, so it would be great to connect them.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170516094533.6160-5-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:36 +02:00
Vladimir Sementsov-Ogievskiy
f260956536 nbd: add errp parameter to nbd_wr_syncv()
Will be used in following patch to provide actual error message in
some cases.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170516094533.6160-4-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:36 +02:00
Vladimir Sementsov-Ogievskiy
f5d406fe86 nbd: read_sync and friends: return 0 on success
functions read_sync, drop_sync, write_sync, and also
nbd_negotiate_write, nbd_negotiate_read, nbd_negotiate_drop_sync
returns number of processed bytes. But what this number can be,
except requested number of bytes?

Actually, underlying nbd_wr_syncv function returns a value >= 0 and
!= requested_bytes only on eof on read operation. So, firstly, it is
impossible on write (let's add an assert) and on read it actually
means, that communication is broken (except nbd_receive_reply, see
below).

Most of callers operate like this:
   if (func(..., size) != size) {
       /* error path */
   }
, i.e.:
  1. They are not interested in partial success
  2. Extra duplications in code (especially bad are duplications of
     magic numbers)
  3. User doesn't see actual error message, as return code is lost.
     (this patch doesn't fix this point, but it makes fixing easier)

Several callers handles ret >= 0 and != requested-size separately, by
just returning EINVAL in this case. This patch makes read_sync and
friends return EINVAL in this case, so final behavior is the same.

And only one caller - nbd_receive_reply() does something not so
obvious. It returns EINVAL for ret > 0 and != requested-size, like
previous group, but for ret == 0 it returns 0. The only caller of
nbd_receive_reply() - nbd_read_reply_entry() handles ret == 0 in the
same way as ret < 0, so for now it doesn't matter. However, in
following commits error path handling will be improved and we'll need
to distinguish success from fail in this case too. So, this patch adds
separate helper for this case - read_sync_eof.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170516094533.6160-3-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Vladimir Sementsov-Ogievskiy
f250a42dda nbd: strict nbd_wr_syncv
nbd_wr_syncv is called either from coroutine or from client negotiation
code, when socket is in blocking mode. So, -EAGAIN is impossible.

Furthermore, EAGAIN is confusing, as, what to read/write again? With
EAGAIN as a return code we don't know how much data is already
read or written by the function, so in case of EAGAIN the whole
communication is broken.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170516094533.6160-2-vsementsov@virtuozzo.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Stefano Stabellini
7e6478e7d4 Check the return value of fcntl in qemu_set_cloexec
Assert that the return value is not an error. This issue was found by
Coverity.

CID: 1374831

Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
CC: groug@kaod.org
CC: pbonzini@redhat.com
CC: Eric Blake <eblake@redhat.com>
Message-Id: <1494356693-13190-2-git-send-email-sstabellini@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Peter Xu
fd56356422 kvm: irqchip: skip update msi when disabled
It's possible that one device kept its irqfd/virq there even when
MSI/MSIX was disabled globally for that device. One example is
virtio-net-pci (see commit f1d0f15a6 and virtio_pci_vq_vector_mask()).
It is used as a fast path to avoid allocate/release irqfd/virq
frequently when guest enables/disables MSIX.

However, this fast path brought a problem to msi_route_list, that the
device MSIRouteEntry is still dangling there even if MSIX disabled -
then we cannot know which message to fetch, even if we can, the messages
are meaningless. In this case, we can just simply ignore this entry.

It's safe, since when MSIX is enabled again, we'll rebuild them no
matter what.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1448813

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1494309644-18743-4-git-send-email-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Peter Xu
993b1f4b2c msix: trace control bit write op
Meanwhile, abstract a function to detect msix masked bit.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1494309644-18743-3-git-send-email-peterx@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Peter Xu
9ba35d0b86 kvm: irqchip: trace changes on msi add/remove
It'll be nice to know which virq belongs to which device/vector when
adding msi routes, so adding two more parameters for the add trace.

Meanwhile, releasing virq has no tracing before. Add one for it.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1494309644-18743-2-git-send-email-peterx@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Xiao Guangrong
bd618eab76 qtest: add rtc periodic timer test
It tests the accuracy of rtc periodic timer which is recently
improved & fixed by commit 7ffcb539a3 ("mc146818rtc: precisely count
the clock for periodic timer", 2017-05-19).

Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
Message-Id: <20170527025301.23499-1-xiaoguangrong@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Xiao Guangrong
e0c8b950d1 mc146818rtc: embrace all x86 specific code
Introduce a function, rtc_policy_slew_deliver_irq(), which delivers
irq if LOST_TICK_POLICY_SLEW is used, as which is only supported on
x86, other platforms call it will trigger a assert

After that, we can move the x86 specific code to the common place

Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
Message-Id: <20170510083259.3900-6-xiaoguangrong@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Xiao Guangrong
388ad5d296 mc146818rtc: drop unnecessary '#ifdef TARGET_I386'
If the code purely depends on LOST_TICK_POLICY_SLEW, we can simply
drop '#ifdef TARGET_I386' as only x86 can enable this tick policy

Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
Message-Id: <20170510083259.3900-5-xiaoguangrong@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Xiao Guangrong
4aa70a0e9c mc146818rtc: ensure LOST_TICK_POLICY_SLEW is only enabled on TARGET_I386
Any tick policy specified on other platforms rather on TARGET_I386
will fall back to LOST_TICK_POLICY_DISCARD silently, this patch makes
sure only TARGET_I386 can enable LOST_TICK_POLICY_SLEW

After that, we can enable LOST_TICK_POLICY_SLEW in the common code
which need not use '#ifdef TARGET_I386' to make these code be x86
specific anymore

Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
Message-Id: <20170510083259.3900-4-xiaoguangrong@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Tai Yunfang
369b41359a mc146818rtc: precisely count the clock for periodic timer
There are two issues in current code:
1) If the period is changed by re-configuring RegA, the coalesced
   irq will be scaled to reflect the new period, however, it
   calculates the new interrupt number like this:
    s->irq_coalesced = (s->irq_coalesced * s->period) / period;

   There are some clocks will be lost if they are not enough to
   be squeezed to a single new period that will cause the VM clock
   slower

   In order to fix the issue, we calculate the interrupt window
   based on the precise clock rather than period, then the clocks
   lost during period is scaled can be compensated properly

2) If periodic_timer_update() is called due to RegA reconfiguration,
   i.e, the period is updated, current time is not the start point
   for the next periodic timer, instead, which should start from the
   last interrupt, otherwise, the clock in VM will become slow

   This patch takes the clocks from last interrupt to current clock
   into account and compensates the clocks for the next interrupt,
   especially if a complete interrupt was lost in this window, the
   time can be caught up by LOST_TICK_POLICY_SLEW

Signed-off-by: Tai Yunfang <yunfangtai@tencent.com>
Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
Message-Id: <20170510083259.3900-3-xiaoguangrong@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Xiao Guangrong
9a6e2dcfdd mc146818rtc: update periodic timer only if it is needed
Currently, the timer is updated whenever RegA or RegB is written
even if the periodic timer related configuration is not changed

This patch optimizes it slightly to make the update happen only
if its period or enable-status is changed, also later patches are
depend on this optimization

Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
Message-Id: <20170510083259.3900-2-xiaoguangrong@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 20:18:35 +02:00
Peter Maydell
65dfad62a1 Merge remote-tracking branch 'remotes/xtensa/tags/20170606-xtensa' into staging
target/xtensa fixes:

- fix read/write simcall mapping flags and return value;
- use -serial option to direct console output of sim machine to QEMU chardev;
- fix handling of unknown registers in the gdbstub.

# gpg: Signature made Tue 06 Jun 2017 11:46:05 BST
# gpg:                using RSA key 0x51F9CC91F83FA044
# gpg: Good signature from "Max Filippov <filippov@cadence.com>"
# gpg:                 aka "Max Filippov <max.filippov@cogentembedded.com>"
# gpg:                 aka "Max Filippov <jcmvbkbc@gmail.com>"
# Primary key fingerprint: 2B67 854B 98E5 327D CDEB  17D8 51F9 CC91 F83F A044

* remotes/xtensa/tags/20170606-xtensa:
  target/xtensa: handle unknown registers in gdbstub
  target/xtensa: support output to chardev console
  target/xtensa: fix return value of read/write simcalls
  target/xtensa: fix mapping direction in read/write simcalls

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-06 17:00:12 +01:00
Peter Maydell
572db7cd69 Merge remote-tracking branch 'remotes/armbru/tags/pull-misc-2017-06-06' into staging
Miscellaneous patches for 2017-06-06

# gpg: Signature made Tue 06 Jun 2017 08:30:43 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-misc-2017-06-06:
  monitor: fix object_del for command-line-created objects
  tests: check-qom-proplist: add checks for cmdline-created objects
  virtio-scsi-test: Use scsi-hd instead of legacy scsi-disk
  block: Clarify documentation of BlockInfo member io-status

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-06 15:37:54 +01:00
Peter Maydell
e02bbe1956 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.10-20170606' into staging
ppc patch queue 2017-06-06

Accumulated patches for ppc targets and the pseries machine type.

The big thing in this batch is a start on a substantial cleanup of the
pseries hotplug mechanisms, which were pretty confusing.  For now
these shouldn't cause substantial behavioural changes, but I am hoping
these lead to clearer code and eventually to fixes for the bugs we
have in hotplug handling, particularly when hotplug and migration are
combined.

The remaining patches are mostly bugfixes.

# gpg: Signature made Tue 06 Jun 2017 03:48:50 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.10-20170606:
  spapr: Remove some non-useful properties on DRC objects
  spapr: Eliminate spapr_drc_get_type_str()
  spapr: Move configure-connector state into DRC
  spapr: Clean up spapr_dr_connector_by_*()
  spapr: Introduce DRC subclasses
  spapr/drc: don't migrate DRC of cold-plugged CPUs and LMBs
  spapr: Allow boot from vhost-*-scsi backends
  ppc/pnv: check the return value of fdt_setprop()
  spapr_nvram: Check return value from blk_getlength()
  target/ppc: Fixup set_spr error in h_register_process_table
  target-ppc: Fix openpic timer read register offset
  spapr: Make DRC get_index and get_type methods into plain functions
  spapr: Abolish DRC set_configured method
  spapr: Abolish DRC get_fdt method
  spapr: Move DRC RTAS calls into spapr_drc.c
  migration: Mark CPU states dirty before incoming migration/loadvm
  migration: remove register_savevm()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-06 14:30:06 +01:00
Max Filippov
dd7b952b79 target/xtensa: handle unknown registers in gdbstub
Xtensa cores may have registers of types/sizes not supported by the
gdbstub accessors. Ignore writes to such registers and return zero on
read, but always return correct register size, so that gdb on the other
side is able to access all registers in the packet holding unsupported
registers in the middle. This fixes gdb interaction with cores that have
vector/custom TIE registers.

Cc: qemu-stable@nongnu.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
2017-06-06 02:40:48 -07:00
Max Filippov
8128b3e079 target/xtensa: support output to chardev console
In semihosting mode QEMU allows guest to read and write host file
descriptors directly, including descriptors 0..2, a.k.a. stdin, stdout
and stderr. Sometimes it's desirable to have semihosting console
controlled by -serial option, e.g. to connect it to network.

Add semihosting console to xtensa-semi.c, open it in the 'sim' machine
in the presence of -serial option and direct stdout and stderr to it
when it's present.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
2017-06-06 02:40:48 -07:00
Max Filippov
347ec03093 target/xtensa: fix return value of read/write simcalls
Return value of read/write simcalls is not calculated correctly in case
of operations crossing page boundary and in case of short reads/writes.
Read and write simcalls should return the size of data actually
read/written or -1 in case of error.

Cc: qemu-stable@nongnu.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
2017-06-06 02:34:04 -07:00
Max Filippov
30c2afd151 target/xtensa: fix mapping direction in read/write simcalls
Read and write simcalls map physical memory to access I/O buffers, but
'read' simcall need to map it for writing and 'write' simcall need to
map it for reading, i.e. the opposite of what they do now. Fix that.

Cc: qemu-stable@nongnu.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
2017-06-06 02:34:04 -07:00
Peter Maydell
a65afaae0f Merge remote-tracking branch 'remotes/ehabkost/tags/x86-and-machine-pull-request' into staging
x86 and machine queue, 2017-06-05

# gpg: Signature made Mon 05 Jun 2017 19:58:01 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-and-machine-pull-request:
  scripts: Test script to look for -device crashes
  qemu.py: Add QEMUMachine.exitcode() method
  qemu.py: Don't set _popen=None on error/shutdown
  spapr: cleanup spapr_fixup_cpu_numa_dt() usage
  numa: move numa_node from CPUState into target specific classes
  numa: make hmp 'info numa' fetch numa nodes from qmp_query_cpus() result
  numa: make sure that all cpus have has_node_id set if numa is enabled
  numa: move default mapping init to machine
  numa: consolidate cpu_preplug fixups/checks for pc/arm/spapr
  pc: Use "min-[x]level" on compat_props

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-06 10:00:34 +01:00
David Hildenbrand
fbe8202ea8 s390x/cpumodel: improve defintion search without an IBC
Currently, under z/VM on a 0x2827, QEMU will detect a 0x2828 if no
IBC value is provided. QEMU will simply take the last model of that HW
generation, which happens to be the BC version.

Let's improve our search for that case by selecting the latest CPU
definition that matches the CPU type. This for example will avoid
detecting an z13 as a z13s.

We might still detect a GA2 version on a GA1 system, but as we don't
have further information at hand, there isn't too much we can do about
it. The alternative of always presenting the oldest GA is not backward
compatible, e.g:
You're running on 0x2827 GA2.
Old QEMU version indicated "0x2828 GA1 == 0x2827 GA2". After you updated
QEMU, you suddenly detect "0x2827 GA1". You're previous libvirt guest
might suddenly refuse to run.

In the end presenting a newer GA level does not matter because:

1: All GAX models share the same base feature set. A GAX++ might
support "more features".
2: Without an IBC, the guest can't detect the GA version.

If we have no IBC (esp. unblocked_ibc == 0), the IBC we will present
to the guest in read_SCP_info() will be 0. The guest will not know
which GA version it has. The problem of missing IBC propagates.

If we don't have a feature of the GA++ version, also our guest won't
have it. So in summary, the guest also has no idea of its GA version.

Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170531193434.6918-3-david@redhat.com>
Acked-by: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
[improve patch description by reusing mailing list discussion]
2017-06-06 10:50:40 +02:00
David Hildenbrand
64bc98f4b9 s390x/cpumodel: take care of the cpuid format bit for KVM
Let's also properly forward that bit. It should always be set. I
verified it under z/VM, it seems to be always set there. For now,
zKVM guests never get that bit set when the CPU model is active.

The PoP mentiones, that z800 + z900 (HW generation 7) always set this
bit to 0, so let's take care of that.

Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170531193434.6918-2-david@redhat.com>
Acked-by: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-06-06 10:50:40 +02:00
Greg Kurz
c68f4503e0 pc-bios/s390-ccw: use STRIP variable in Makefile
The docker-run-test-build@debian-s390x-cross target fails with:

strip --strip-unneeded s390-ccw.elf -o s390-ccw.img
strip: Unable to recognise the format of the input file `s390-ccw.elf'

The configure script defines a STRIP makefile variable whose default
value is ${cross_prefix}strip. Let's use it.

We default to using the non-prefixed strip command in case --enable-debug
or --disable-strip was passed to configure during a regular build.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <149623617700.4947.12490877660892961664.stgit@bahia.lan>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-06-06 10:50:40 +02:00
Cornelia Huck
4e19b57b0e s390x/css: fence off MIDA
MIDA (modified indirect data addressing) is an optional facility, and
we (currently) don't support it. Let's post an operand exception if
the guest tries to set it in the orb and a channel program check
if it is set in a ccw, as specified in the Principles of Operation.

Reviewed-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-06-06 10:17:11 +02:00
Halil Pasic
8ed179c937 s390x/css: catch section mismatch on load
Prior to the virtio-ccw-2.7 machine (and commit 2a79eb1a), our virtio
devices residing under the virtual-css bus do not have qdev_path based
migration stream identifiers (because their qdev_path is NULL). The ids
are instead generated when the device is registered as a composition of
the so called idstr, which takes the vmsd name as its value, and an
instance_id, which is which is calculated as a maximal instance_id
registered with the same idstr plus one, or zero (if none was registered
previously).

That means, under certain circumstances, one device might try, and even
succeed, to load the state of a different device. This can lead to
trouble.

Let us fail the migration if the above problem is detected during load.

How to reproduce the problem:
1) start qemu-system-s390x making sure you have the following devices
   defined on your command line:
     -device virtio-rng-ccw,id=rng1,devno=fe.0.0001
     -device virtio-rng-ccw,id=rng2,devno=fe.0.0002
2) detach the devices and reattach in reverse order using the monitor:
     (qemu) device_del rng1
     (qemu) device_del rng2
     (qemu) device_add virtio-rng-ccw,id=rng2,devno=fe.0.0002
     (qemu) device_add virtio-rng-ccw,id=rng1,devno=fe.0.0001
3) save the state of the vm into a temporary file and quit QEMU:
     (qemu) migrate "exec:gzip -c > /tmp/tmp_vmstate.gz"
     (qemu) q
4) use your command line from step 1 with
     -incoming "exec:gzip -c -d /tmp/tmp_vmstate.gz"
   appended to reproduce the problem (while trying to to load the saved vm)

CC: qemu-stable@nongnu.org
Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Message-Id: <20170518111405.56947-1-pasic@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-06-06 10:16:46 +02:00
Michael Roth
c645d5acee monitor: fix object_del for command-line-created objects
Currently objects specified on the command-line are only partially
cleaned up when 'object_del' is issued in either HMP or QMP: the
object itself is fully finalized, but the QemuOpts are not removed.
This results in the following behavior:

  x86_64-softmmu/qemu-system-x86_64 -monitor stdio \
    -object memory-backend-ram,id=ram1,size=256M

  QEMU 2.7.91 monitor - type 'help' for more information
  (qemu) object_del ram1
  (qemu) object_del ram1
  object 'ram1' not found
  (qemu) object_add memory-backend-ram,id=ram1,size=256M
  Duplicate ID 'ram1' for object
  Try "help object_add" for more information

which can be an issue for use-cases like memory hotplug.

This happens on the HMP side because hmp_object_add() attempts to
create a temporary QemuOpts entry with ID 'ram1', which ends up
conflicting with the command-line-created entry, since it was never
cleaned up during the previous hmp_object_del() call.

We address this by adding a check in user_creatable_del(), which
is called by both qmp_object_del() and hmp_object_del() to handle
the actual object cleanup, to determine whether an option group entry
matching the object's ID is present and removing it if it is.

Note that qmp_object_add() never attempts to create a temporary
QemuOpts entry, so it does not encounter the duplicate ID error,
which is why this isn't generally visible in libvirt.

Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Eric Blake <eblake@redhat.com>
Cc: Daniel Berrange <berrange@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1496531612-22166-3-git-send-email-mdroth@linux.vnet.ibm.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-06 09:29:46 +02:00
Michael Roth
a1af255f06 tests: check-qom-proplist: add checks for cmdline-created objects
check-qom-proplist originally added tests for verifying that
object-creation helpers object_new_with_{props,propv} behaved in
similar fashion to the "traditional" method involving setting each
individual property separately after object creation rather than
via a single call.

Another similar "helper" for creating Objects exists in the form of
objects specified via -object command-line parameters. By that
rationale, we extend check-qom-proplist to include similar checks
for command-line-created objects by employing the same
qemu_opts_parse()-based parsing the vl.c employs.

This parser has a side-effect of parsing the object's options into
a QemuOpt structure and registering this in the global QemuOptsList
using the Object's ID. This can conflict with future Object instances
that attempt to use the same ID if we don't ensure this is cleaned
up as part of Object finalization, so we include a FIXME stub to test
for this case, which will then be resolved in a subsequent patch.

Suggested-by: Daniel Berrange <berrange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Eric Blake <eblake@redhat.com>
Cc: Daniel Berrange <berrange@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1496531612-22166-2-git-send-email-mdroth@linux.vnet.ibm.com>
[Comment formatting tidied up]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-06-06 09:29:04 +02:00
Markus Armbruster
8ee47a886f virtio-scsi-test: Use scsi-hd instead of legacy scsi-disk
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1494327362-30727-3-git-send-email-armbru@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 08:46:21 +02:00
Markus Armbruster
f6f55affd1 block: Clarify documentation of BlockInfo member io-status
Say "SCSI except scsi-generic" instead of "scsi-disk", because
scsi-disk could mean either scsi-disk.c (which is correct) or device
model scsi-disk (which would be incorrect).

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1494327362-30727-2-git-send-email-armbru@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2017-06-06 08:46:12 +02:00
David Gibson
91dcb1ffa6 spapr: Remove some non-useful properties on DRC objects
* 'connector_type' is easily derived from the 'index' property, so there's
   no point to it (it's also implicit in the QOM type of the DRC)
 * 'isolation-state', 'indicator-state' and 'allocation-state' are
   part of the transaction between qemu and guest during PAPR hotplug
   operations, and outside tools really have no business looking at it
   (especially not changing, and these were RW properties)
 * 'entity-sense' is basically just a weird PAPR encoding of whether there
   is a device connected to this DRC

Strictly speaking removing these properties is breaking the qemu interface.
However, I'm pretty sure no management tools have ever used these.  For
debugging there are better alternatives.  Therefore, I think removing these
broken interfaces is the better option.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-06 09:24:25 +10:00
David Gibson
1693ea1685 spapr: Eliminate spapr_drc_get_type_str()
This function was used in generating the device tree.  However, now that
we have different QOM types for different DRC types we can easily store
the information we need in the class structure and avoid this specialized
lookup function.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-06 09:24:21 +10:00
David Gibson
b8fdd530be spapr: Move configure-connector state into DRC
Currently the sPAPRMachineState contains a list of sPAPRConfigureConnector
structures which store intermediate state for the ibm,configure-connector
RTAS call.

This was an attempt to separate this state from the core of the DRC state.
However the configure connector process is intimately tied to the DRC
model, so there's really no point trying to have two levels of interface
here.

Moving the configure-connector state into its corresponding DRC allows
removal of a number of helpers for maintaining the anciliary list.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-06 09:24:17 +10:00
David Gibson
fbf5539718 spapr: Clean up spapr_dr_connector_by_*()
* Change names to something less ludicrously verbose
 * Now that we have QOM subclasses for the different DRC types, use a QOM
   typename instead of a PAPR type value parameter

The latter allows removal of the get_type_shift() helper.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-06 09:24:08 +10:00
David Gibson
2d33581899 spapr: Introduce DRC subclasses
Currently we only have a single QOM type for all DRCs, but lots of
places where we switch behaviour based on the DRC's PAPR defined type.
This is a poor use of our existing type system.

So, instead create QOM subclasses for each PAPR defined DRC type.  We
also introduce intermediate subclasses for physical and logical DRCs,
a division which will be useful later on.

Instead of being stored in the DRC object itself, the PAPR type is now
stored in the class structure.  There are still many places where we
switch directly on the PAPR type value, but this at least provides the
basis to start to remove those.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Acked-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-06-06 09:23:46 +10:00
Greg Kurz
a32e900b8a spapr/drc: don't migrate DRC of cold-plugged CPUs and LMBs
As explained in commit 5c0139a8c2 ("spapr: fix default DRC state for
coldplugged LMBs"), guests expect cold-plugged LMBs to be pre-allocated
and unisolated. The same goes for cold-plugged CPUs.

While here, let's convert g_assert(false) to the better self documenting
g_assert_not_reached().

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-06 09:22:02 +10:00
Felipe Franciosi
c4e13492af spapr: Allow boot from vhost-*-scsi backends
The current implementation of spapr_get_fw_dev_path() doesn't take into
consideration vhost-*-scsi devices. This makes said devices unbootable
on PPC as SLOF is unable to work out the path to scan boot disks.

This makes VMs bootable on spapr when using vhost-*-scsi by implementing
a disk path for VHostSCSICommon (which currently includes both
vhost-user-scsi and vhost-scsi).

Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
Signed-off-by: Mike Cui <cui@nutanix.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-06 09:19:01 +10:00
Cédric Le Goater
7032d92ac8 ppc/pnv: check the return value of fdt_setprop()
Signed-off-by: Cédric Le Goater <clg@kaod.org>
[dwg: Correct typo in commit message]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-06 09:18:46 +10:00
Peter Maydell
0524951788 spapr_nvram: Check return value from blk_getlength()
The blk_getlength() function can return an error value if the
image size cannot be determined. Check for this rather than
ploughing on and trying to g_malloc0() a negative number.
(Spotted by Coverity, CID 1288484.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-06 09:18:32 +10:00
Suraj Jitindar Singh
60694bc678 target/ppc: Fixup set_spr error in h_register_process_table
set_spr is used in the function h_register_process_table() to update the
LPCR_GTSE and LPCR_UPRT values based on the flags passed by the guest.
The set_spr function takes the last two arguments mask and value used to
mask and set the value of the spr respectively.

The current call site passes these arguments in the wrong order and thus
bot GTSE and UPRT will be set irrespective, which is obviously
incorrect.

Rearrange the function call so that these arguments are passed in the
correct order and the correct behaviour is exhibited.

It is worth noting that this wasn't detected earlier since these were
always both set in all cases where this H_CALL was made.

Fixes: 6de833070c ("target/ppc: Set UPRT and GTSE on all cpus in H_REGISTER_PROCESS_TABLE")

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-06 08:53:24 +10:00
Aaron Larson
a09f7443bc target-ppc: Fix openpic timer read register offset
openpic_tmr_read() is incorrectly computing register offset of the
TCCR, TBCR, TVPR, and TDR registers when accessing the open pic timer
registers.  Specifically the offset of timer registers for
openpic_tmr_read() is not accounting for the timer frequency reporting
register (TFFR) which is the first register in the "tmr" memory
region.

openpic_tmr_write() *is* correctly computing the offset by adding
0x10f0 to the address prior to computing the register index.  This
patch instead subtracts 0x10 in both the read and write routines and
eliminates some other gratuitous differences between the functions.

Signed-off-by: Aaron Larson <alarson@ddci.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-06 08:53:24 +10:00
David Gibson
0b55aa91c9 spapr: Make DRC get_index and get_type methods into plain functions
These two methods only have one implementation, and the spec they're
implementing means any other implementation is unlikely, verging on
impossible.

So replace them with simple functions.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-06-06 08:53:24 +10:00
David Gibson
4f65ce00ab spapr: Abolish DRC set_configured method
DRConnectorClass has a set_configured method, however:
  * There is only one implementation, and only ever likely to be one
  * There's exactly one caller, and that's (now) local
  * The implementation is very straightforward

So abolish the method entirely, and just open-code what we need.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-06-06 08:53:24 +10:00
David Gibson
88af6ea568 spapr: Abolish DRC get_fdt method
The DRConnectorClass includes a get_fdt method.  However
  * There's only one implementation, and there's only likely to ever be one
  * Both callers are local to spapr_drc
  * Each caller only uses one half of the actual implementation

So abolish get_fdt() entirely, and just open-code what we need.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-06-06 08:53:24 +10:00
David Gibson
b89b3d3929 spapr: Move DRC RTAS calls into spapr_drc.c
Currently implementations of the RTAS calls related to DRCs are in
spapr_rtas.c.  They belong better in spapr_drc.c - that way they're closer
to related code, and we'll be able to make some more things local.

spapr_rtas.c was intended to contain the RTAS infrastructure and core calls
that don't belong anywhere else, not every RTAS implementation.

Code motion only.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-06-06 08:53:24 +10:00
David Gibson
75e972dab5 migration: Mark CPU states dirty before incoming migration/loadvm
As a rule, CPU internal state should never be updated when
!cpu->kvm_vcpu_dirty (or the HAX equivalent).  If that is done, then
subsequent calls to cpu_synchronize_state() - usually safe and idempotent -
will clobber state.

However, we routinely do this during a loadvm or incoming migration.
Usually this is called shortly after a reset, which will clear all the cpu
dirty flags with cpu_synchronize_all_post_reset().  Nothing is expected
to set the dirty flags again before the cpu state is loaded from the
incoming stream.

This means that it isn't safe to call cpu_synchronize_state() from a
post_load handler, which is non-obvious and potentially inconvenient.

We could cpu_synchronize_all_state() before the loadvm, but that would be
overkill since a) we expect the state to already be synchronized from the
reset and b) we expect to completely rewrite the state with a call to
cpu_synchronize_all_post_init() at the end of qemu_loadvm_state().

To clear this up, this patch introduces cpu_synchronize_pre_loadvm() and
associated helpers, which simply marks the cpu state as dirty without
actually changing anything.  i.e. it says we want to discard any existing
KVM (or HAX) state and replace it with what we're going to load.

Cc: Juan Quintela <quintela@redhat.com>
Cc: Dave Gilbert <dgilbert@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Juan Quintela <quintela@redhat.com>
2017-06-06 08:53:24 +10:00
Laurent Vivier
1b6e748246 migration: remove register_savevm()
We can replace the four remaining calls of register_savevm() by
calls to register_savevm_live(). So we can remove the function and
as we don't allocate anymore the ops pointer with g_new0()
we don't have to free it then.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-06-06 08:53:24 +10:00
Eduardo Habkost
23ea4f3032 scripts: Test script to look for -device crashes
Test code to check if we can crash QEMU using -device. It will
test all accel/machine/device combinations by default, which may
take a few hours (it's more than 90k test cases). There's a "-r"
option that makes it test a random sample of combinations.

The scripts contains a whitelist for: 1) known error messages
that make QEMU exit cleanly; 2) known QEMU crashes.

This is the behavior when the script finds a failure:

* Known clean (exitcode=1) errors generate DEBUG messages
  (hidden by default)
* Unknown clean (exitcode=1) errors will generate INFO messages
  (visible by default)
* Known crashes generate error messages, but are not fatal
  (unless --strict mode is used)
* Unknown crashes generate fatal error messages

Having an updated whitelist of known clean errors is useful to make the
script less verbose and run faster when in --quick mode, but the
whitelist doesn't need to be always up to date.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170526181200.17227-4-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-06-05 14:59:09 -03:00
Eduardo Habkost
b2b8d98675 qemu.py: Add QEMUMachine.exitcode() method
Allow the exit code of QEMU to be queried by scripts.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170526181200.17227-3-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-06-05 14:59:09 -03:00
Eduardo Habkost
37bbcd5757 qemu.py: Don't set _popen=None on error/shutdown
Keep the Popen object around to we can query its exit code later.

To keep the existing 'self._popen is None' checks working, add a
is_running() method, that will check if the process is still running.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170526181200.17227-2-ehabkost@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-06-05 14:59:09 -03:00
Igor Mammedov
99861ecbc5 spapr: cleanup spapr_fixup_cpu_numa_dt() usage
even though spapr_fixup_cpu_numa_dt() has no effect on FDT
if numa is disabled, don't call it uselessly. It makes it
obvious at call sites that function is needed only when numa
is enabled.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1496161442-96665-7-git-send-email-imammedo@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-06-05 14:59:09 -03:00
Igor Mammedov
15f8b14228 numa: move numa_node from CPUState into target specific classes
Move vcpu's associated numa_node field out of generic CPUState
into inherited classes that actually care about cpu<->numa mapping,
i.e: ARMCPU, PowerPCCPU, X86CPU.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1496161442-96665-6-git-send-email-imammedo@redhat.com>
[ehabkost: s/CPU is belonging to/CPU belongs to/ on comments]
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-06-05 14:59:09 -03:00
Igor Mammedov
f75cd44de0 numa: make hmp 'info numa' fetch numa nodes from qmp_query_cpus() result
HMP command 'info numa' is the last external user that access
CPUState::numa_node field directly. In order to move it to CPU
classes that actually use it, eliminate direct access and use
an alternative approach by using result of qmp_query_cpus(),
which provides topology properties CPU threads are associated
with (including node-id).

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1496161442-96665-5-git-send-email-imammedo@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-06-05 14:59:08 -03:00
Igor Mammedov
d41f3e750d numa: make sure that all cpus have has_node_id set if numa is enabled
It fixes/add missing _PXM object for non mapped CPU (x86)
and missing fdt node (virt-arm).

It ensures that possible_cpus contains complete mapping if
numa is enabled by the time machine_init() is executed.

As result non completely mapped CPUs:
 1) appear in ACPI/fdt blobs
 2) QMP query-hotpluggable-cpus command shows bound nodes for such CPUs
 3) allows to drop checks for has_node_id in numa only code,
   reducing number of invariants incomplete mapping could produce
 4) moves fixup/implicit node init from runtime numa_cpu_pre_plug()
   (when CPU object is created) to machine_numa_finish_init() which
   helps to fix [1, 2] and make possible_cpus complete source
   of numa mapping available even before CPUs are created.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1496161442-96665-4-git-send-email-imammedo@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-06-05 14:59:08 -03:00
Igor Mammedov
60bed6a30a numa: move default mapping init to machine
there is no need use cpu_index_to_instance_props() for setting
default cpu -> node mapping. Generic machine code can do it
without cpu_index by just enabling already preset defaults
in possible_cpus.

PS:
as bonus it makes one less user of cpu_index_to_instance_props()

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1496161442-96665-3-git-send-email-imammedo@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-06-05 14:59:08 -03:00
Igor Mammedov
a0ceb640d0 numa: consolidate cpu_preplug fixups/checks for pc/arm/spapr
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <1496161442-96665-2-git-send-email-imammedo@redhat.com>
[ehabkost: Fix indentation]
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-06-05 14:59:08 -03:00
Eduardo Habkost
1f43571604 pc: Use "min-[x]level" on compat_props
Since the automatic cpuid-level code was introduced in commit
c39c0edf9b ("target-i386: Automatically
set level/xlevel/xlevel2 when needed"), the CPU model tables just define
the default CPUID level code (set using "min-level").  Setting
"[x]level" forces CPUID level to a specific value and disable the
automatic-level logic.

But the PC compat code was not updated and the existing "[x]level"
compat properties broke compatibility for people using features that
triggered the auto-level code.  To keep previous behavior, we should set
"min-[x]level" instead of "[x]level" on compat_props.

This was not a problem for most cases, because old machine-types don't
have full-cpuid-auto-level enabled.  The only common use case it broke
was the CPUID[7] auto-level code, that was already enabled since the
first CPUID[7] feature was introduced (in QEMU 1.4.0).

This causes the regression reported at:
https://bugzilla.redhat.com/show_bug.cgi?id=1454641

Change the PC compat code to use "min-[x]level" instead of "[x]level" on
compat_props, and add new test cases to ensure we don't break this
again.

Reported-by: "Guo, Zhiyi" <zhguo@redhat.com>
Fixes: c39c0edf9b ("target-i386: Automatically set level/xlevel/xlevel2 when needed")
Cc: qemu-stable@nongnu.org
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-06-05 14:59:08 -03:00
Peter Maydell
a0d4aac746 Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170605' into staging
Queued TCG patches

# gpg: Signature made Mon 05 Jun 2017 17:48:42 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20170605: (26 commits)
  target/alpha: Use goto_tb for fallthru between TBs
  target/alpha: Implement WTINT inline
  target/mips: optimize indirect branches
  target/mips: optimize cross-page direct jumps in softmmu
  target/aarch64: optimize indirect branches
  target/aarch64: optimize cross-page direct jumps in softmmu
  target/hppa: Use tcg_gen_lookup_and_goto_ptr
  target/s390: Use tcg_gen_lookup_and_goto_ptr
  tcg/mips: implement goto_ptr
  tcg/arm: Implement goto_ptr
  tcg/arm: Clarify tcg_out_bx for arm4 host
  tcg/s390: Implement goto_ptr
  tcg/sparc: Implement goto_ptr
  tcg/aarch64: Implement goto_ptr
  tcg/ppc: Implement goto_ptr
  tb-hash: improve tb_jmp_cache hash function in user mode
  target/i386: optimize indirect branches
  target/i386: optimize cross-page direct jumps in softmmu
  target/i386: introduce gen_jr helper to generate lookup_and_goto_ptr
  target/arm: optimize indirect branches
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-05 18:03:43 +01:00
Richard Henderson
2d826cdc8a target/alpha: Use goto_tb for fallthru between TBs
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
bec5e2b975 target/alpha: Implement WTINT inline
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Aurelien Jarno
e350d8ca3a target/mips: optimize indirect branches
Cc: Yongbok Kim <yongbok.kim@imgtec.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170430145254.25616-4-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Aurelien Jarno
d9a9acde64 target/mips: optimize cross-page direct jumps in softmmu
Cc: Yongbok Kim <yongbok.kim@imgtec.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170430145254.25616-3-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Emilio G. Cota
e75449a346 target/aarch64: optimize indirect branches
Measurements:

[Baseline performance is that before applying this and the previous commit]

-                                    NBench, aarch64-softmmu. Host: Intel i7-4790K @ 4.00GHz

 1.7x +-+--------------------------------------------------------------------------------------------------------------+-+
      |                                                                                                                  |
      |   cross                                                                                                          |
 1.6x +cross+jr.................................................####...................................................+-+
      |                                                         #++#                                                     |
      |                                                         #  #                                                     |
 1.5x +-+...................................................*****..#...................................................+-+
      |                                                     *+++*  #                                                     |
      |                                                     *   *  #                                                     |
 1.4x +-+...................................................*...*..#...................................................+-+
      |                                                     *   *  #                                                     |
      |                                     #####           *   *  #                                                     |
 1.3x +-+................................****+++#...........*...*..#...................................................+-+
      |                                  *++*   #           *   *  #                                                     |
      |                                  *  *   #           *   *  #                                                     |
 1.2x +-+................................*..*...#...........*...*..#...................................................+-+
      |                                  *  *   #           *   *  #                                                     |
      |                            ####  *  *   #           *   *  #                                                     |
 1.1x +-+.......................+++#..#..*..*...#...........*...*..#...................................................+-+
      |                         ****  #  *  *   #           *   *  #                                        ****####     |
      |                         *  *  #  *  *   #           *   *  #  ****###   +++####            ****###  *  *   #     |
   1x +-++-++++++-++++****###++-*++*++#++*++*+-+#++****+++++*+++*++#++*++*-+#++*****++#++****###-++*++*-+#++*+-*+++#+-++-+
      |     *****###  *  *  #   *  *  #  *  *   #  *++*###  *   *  #  *  *  #  *   *  #  *  *++#   *  *  #  *  *   #     |
      |     *   *++#  *  *  #   *  *  #  *  *   #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #   *  *  #  *  *   #     |
 0.9x +-+---*****###--****###---****###--****####--****###--*****###--****###--*****###--****###---****###--****####---+-+
      ASSIGNMENT BITFIELD   FOURFP EMULATION   HUFFMAN   LU DECOMPOSITIONNEURAL NUMERIC SORSTRING SORT    hmean
  png: http://imgur.com/qO9ubtk
NB. cross here represents the previous commit.

-                            SPECint06 (test set), aarch64-linux-user. Host: Intel i7-4790K @ 4.00GHz

 1.5x +-+--------------------------------------------------------------------------------------------------------------+-+
      |                                                                       *****                                      |
      |                                                                       *+++*                           jr         |
      |                                                                       *   *                                      |
 1.4x +-+.....................................................................*...*.....................+++............+-+
      |                                                                       *   *                      |               |
      |                                      *****                            *   *                      |               |
      |                                      *   *                            *   *                    *****             |
 1.3x +-+....................................*...*............................*...*....................*.|.*...........+-+
      |                       +++            *   *                            *   *                    * | *             |
      |                      *****           *   *                            *   *                    *+++*             |
      |                      *   *           *   *                            *   *                    *   *             |
 1.2x +-+....................*...*...........*...*............................*...*...........*****....*...*...........+-+
      |     *****            *   *           *   *                            *   *           *   *    *   *    +++      |
      |     *   *            *   *           *   *                            *   *           *   *    *   *   *****     |
      |     *   *            *   *   *****   *   *                            *   *           *   *    *   *   *   *     |
 1.1x +-+...*...*............*...*...*...*...*...*............................*...*....+++....*...*....*...*...*...*...+-+
      |     *   *            *   *   *   *   *   *                            *   *   *****   *   *    *   *   *   *     |
      |     *   *            *   *   *   *   *   *   *****                    *   *   *   *   *   *    *   *   *   *     |
      |     *   *   *****    *   *   *   *   *   *   *   *   ******           *   *   *   *   *   *    *   *   *   *     |
   1x +-++-+*+++*-++*+++*++++*+-+*+++*-++*+++*-++*+++*+++*++-*++++*-++*****+++*++-*+++*++-*+++*+-+*++++*+++*++-*+++*+-++-+
      |     *   *   *   *    *   *   *   *   *   *   *   *   *    *   *+++*   *   *   *   *   *   *    *   *   *   *     |
      |     *   *   *   *    *   *   *   *   *   *   *   *   *    *   *   *   *   *   *   *   *   *    *   *   *   *     |
      |     *   *   *   *    *   *   *   *   *   *   *   *   *    *   *   *   *   *   *   *   *   *    *   *   *   *     |
 0.9x +-+---*****---*****----*****---*****---*****---*****---******---*****---*****---*****---*****----*****---*****---+-+
         astar   bzip2      gcc   gobmk h264ref   hmmlibquantum      mcf omnetpperlbench   sjengxalancbmk   hmean
  png: http://imgur.com/3Dp4vvq

-                           SPECint06 (train set), aarch64-linux-user. Host: Intel i7-4790K @ 4.00GHz

 1.7x +-+--------------------------------------------------------------------------------------------------------------+-+
      |                                                                                                                  |
      |                                                                                                       jr         |
 1.6x +-+...............................................................................................+++............+-+
      |                                                                                                *****             |
      |                                                                                                *+++*             |
      |                                                                                                *   *             |
 1.5x +-+..............................................................................................*...*...........+-+
      |                                                                        +++                     *   *             |
      |                                                                       *****                    *   *             |
 1.4x +-+.....................................................................*+++*....................*...*...........+-+
      |                                                                       *   *                    *   *             |
      |                                      *****                            *   *                    *   *             |
      |                                      *   *                            *   *   *****            *   *             |
 1.3x +-+....................................*...*............................*...*...*...*............*...*...........+-+
      |                       +++            *   *                            *   *   *   *            *   *             |
      |                      *****           *   *                            *   *   *   *   *****    *   *             |
 1.2x +-+....................*...*...........*...*............................*...*...*...*...*+++*....*...*...*****...+-+
      |                      *   *           *   *                            *   *   *   *   *   *    *   *   *+++*     |
      |     *****            *   *   *****   *   *                            *   *   *   *   *   *    *   *   *   *     |
      |     *   *            *   *   *+++*   *   *                            *   *   *   *   *   *    *   *   *   *     |
 1.1x +-+...*...*............*...*...*...*...*...*............................*...*...*...*...*...*....*...*...*...*...+-+
      |     *   *   *****    *   *   *   *   *   *                    *****   *   *   *   *   *   *    *   *   *   *     |
      |     *   *   *   *    *   *   *   *   *   *    +++    ******   *+++*   *   *   *   *   *   *    *   *   *   *     |
   1x +-+---*****---*****----*****---*****---*****---*****---******---*****---*****---*****---*****----*****---*****---+-+
         astar   bzip2      gcc   gobmk h264ref   hmmlibquantum      mcf omnetpperlbench   sjengxalancbmk   hmean
  png: http://imgur.com/vRrdc9j

Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Emilio G. Cota
e78722368c target/aarch64: optimize cross-page direct jumps in softmmu
Perf numbers in next commit's log.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
4137cb83fa target/hppa: Use tcg_gen_lookup_and_goto_ptr
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
6350001e83 target/s390: Use tcg_gen_lookup_and_goto_ptr
Tested-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Aurelien Jarno
5786e0683c tcg/mips: implement goto_ptr
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170430145254.25616-2-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
085c648bef tcg/arm: Implement goto_ptr
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
702a947484 tcg/arm: Clarify tcg_out_bx for arm4 host
In theory this would re-enable usage of QEMU on an armv4 host.
Whether this is worthwhile is debatable -- we've been unconditionally
issuing the armv5t BX instruction in the prologue since 2011 without
complaint.  Possibly we should simply require an armv6 host.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
46644483ca tcg/s390: Implement goto_ptr
Tested-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
38f81dc593 tcg/sparc: Implement goto_ptr
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
b19f0c2e7d tcg/aarch64: Implement goto_ptr
Measurements:

                      SPECint06 (test set), x86_64-linux-user. Host: APM 64-bit ARMv8 (Atlas/A57) @ 2.4 GHz

 1.45x +-+-------------------------------------------------------------------------------------------------------------+-+
       |                                      *****                                                                      |
       |      +++                             *   *                                                    +goto-ptr         |
  1.4x +-+...*****............................*...*....................................................................+-+
       |     *+++*                            *   *                            +++                                       |
 1.35x +-+...*...*............................*...*...........................*****....................................+-+
       |     *   *                            *   *                           *+++*                                      |
       |     *   *                            *   *                           *   *                                      |
  1.3x +-+...*...*............................*...*...........................*...*....................................+-+
       |     *   *                            *   *                           *   *                                      |
       |     *   *                            *   *                           *   *                    *****             |
 1.25x +-+...*...*...........*****............*...*...........................*...*............*****...*...*...........+-+
       |     *   *           *   *            *   *                           *   *            *+++*   *   *             |
  1.2x +-+...*...*...........*...*............*...*...........................*...*............*...*...*...*...........+-+
       |     *   *           *   *            *   *                           *   *            *   *   *   *             |
       |     *   *           *   *            *   *                           *   *            *   *   *   *   *****     |
 1.15x +-+...*...*...........*...*............*...*...........................*...*............*...*...*...*...*...*...+-+
       |     *   *           *   *            *   *                           *   *    +++     *   *   *   *   *   *     |
       |     *   *           *   *            *   *                           *   *   *****    *   *   *   *   *   *     |
  1.1x +-+...*...*...........*...*....*****...*...*...*****...................*...*...*...*....*...*...*...*...*...*...+-+
       |     *   *           *   *    *   *   *   *   *   *                   *   *   *   *    *   *   *   *   *   *     |
 1.05x +-+...*...*...........*...*....*...*...*...*...*...*...................*...*...*...*....*...*...*...*...*...*...+-+
       |     *   *   *****   *   *    *   *   *   *   *   *                   *   *   *   *    *   *   *   *   *   *     |
       |     *   *   *   *   *   *    *   *   *   *   *   *   *****   *****   *   *   *   *    *   *   *   *   *   *     |
    1x +-+---*****---*****---*****----*****---*****---*****---*****---*****---*****---*****----*****---*****---*****---+-+
          astar   bzip2     gcc    gobmk h264ref   hmmlibquantum     mcf omnetpperlbench    sjenxalancbmk   hmean
  png: http://imgur.com/en9HE8L

Tested-by: Emilio G. Cota <cota@braap.org>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
0c240785a8 tcg/ppc: Implement goto_ptr
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Emilio G. Cota
6f1653180f tb-hash: improve tb_jmp_cache hash function in user mode
Optimizations to cross-page chaining and indirect branches make
performance more sensitive to the hit rate of tb_jmp_cache.
The constraint of reserving some bits for the page number
lowers the achievable quality of the hashing function.

However, user-mode does not have this requirement. Thus,
with this change we use for user-mode a hashing function that
is both faster and of better quality than the previous one.

Measurements:

Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0.

-                           SPECint06 (test set), x86_64-linux-user. Host: Intel i7-6700K @ 4.00GHz

 2.2x +-+--------------------------------------------------------------------------------------------------------------+-+
      |                                                                                                                  |
      |         jr                                                                                                       |
   2x +jr+multhash        +....................................................+++++...................................+-+
      |    jr+hash                                                              |$$$                                     |
      |                                                                         |$+$                                     |
      |                                                                        ### $                                     |
 1.8x +-+......................................................................#|#.$...................................+-+
      |                                                                      ++#+# $                                     |
      |                                                                       |# # $                                     |
 1.6x +-+....................................................................***.#.$....................++$$$..........+-+
      |                                         $$$                          *+* # $                     |$+$            |
      |                       ++$$$           ### $                          * * # $                  +++|$ $            |
      |                     ++###+$           # # $                          * * # $           ###   ****## $            |
 1.4x +-+...................***+#.$.........***.#.$..........................*.*.#.$...........#+#$$.*++*|#.$..........+-+
      |                     *+* # $         * * # $                          * * # $           # # $ *  *+# $            |
      |                     * * # $   +++++ * * # $                          * * # $         *** # $ *  * # $   ###$$    |
 1.2x +-+...................*.*.#.$.***##$$.*.*.#.$..........................*.*.#.$.........*.*.#.$.*..*.#.$.***+#+$..+-+
      |                     * * # $ *+* # $ * * # $   +++                    * * # $ ++###$$ * * # $ *  * # $ * * # $    |
      |    ***##$$          * * # $ * * # $ * * # $ ***##$$          ++###   * * # $ *** #+$ * * # $ *  * # $ * * # $    |
      |    *+*+#+$ ***##$$$ * * # $ * * # $ * * # $ *+* # $ ++####$$ ***+#   * * # $ * * # $ * * # $ *  * # $ * * # $    |
   1x +-++-*+*+#+$+*+*+#-+$+*+*-#+$+*+*+#+$+*+*+#+$+*-*+#+$+***++#+$+*+*+#$$+*+*+#+$+*+*+#+$+*+*-#+$+*+-*+#+$+*+*+#+$-++-+
      |    * * # $ * * #  $ * * # $ * * # $ * * # $ * * # $ * *  # $ * * # $ * * # $ * * # $ * * # $ *  * # $ * * # $    |
      |    * * # $ * * #  $ * * # $ * * # $ * * # $ * * # $ * *  # $ * * # $ * * # $ * * # $ * * # $ *  * # $ * * # $    |
 0.8x +-+--***##$$-***##$$$-***##$$-***##$$-***##$$-***##$$-***###$$-***##$$-***##$$-***##$$-***##$$-****##$$-***##$$--+-+
         astar   bzip2      gcc   gobmk h264ref   hmmlibquantum      mcf omnetpperlbench   sjengxalancbmk   hmean
  png: http://imgur.com/4UXTrEc

Here I also tried the hash function suggested by Paolo ("multhash"):

  return ((uint64_t) (pc * 2654435761) >> 32) & (TB_JMP_CACHE_SIZE - 1);

As you can see it is just as good as the other new function ("hash"),
which is what I ended up going with.

-                          SPECint06 (train set), x86_64-linux-user. Host: Intel i7-6700K @ 4.00GHz

 2.6x +-+--------------------------------------------------------------------------------------------------------------+-+
      |                                                                                                                  |
      |     jr                                                                                           ###             |
 2.4x +jr+hash...........................................................................................#.#...........+-+
      |                                                                                                  # #             |
      |                                                                                                  # #             |
 2.2x +-+................................................................................................#.#...........+-+
      |                                                                                                  # #             |
      |                                                                                                  # #             |
   2x +-+................................................................................................#.#...........+-+
      |                                                                                               **** #             |
      |                                                                                               *  * #             |
 1.8x +-+.............................................................................................*..*.#...........+-+
      |                                                                         +++                   *  * #             |
      |                                                                         ####    ####          *  * #             |
 1.6x +-+......................................####.............................#..#.****..#..........*..*.#...........+-+
      |                        +++             #++#                          ****  # *  *  #    ####  *  * #             |
      |                        ###             #  #                          *  *  # *  *  #    #  #  *  * #             |
 1.4x +-+...................****+#..........****..#..........................*..*..#.*..*..#....#..#..*..*.#...........+-+
      |                     *++* #          *  *  #                          *  *  # *  *  #  ***  #  *  * #     ####    |
      |                     *  * #     #### *  *  #                          *  *  # *  *  #  * *  #  *  * #  ****  #    |
 1.2x +-+...................*..*.#..****++#.*..*..#..........................*..*..#.*..*..#..*.*..#..*..*.#..*..*..#..+-+
      |    ****###          *  * #  *  *  # *  *  #                          *  *  # *  *  #  * *  #  *  * #  *  *  #    |
      |    *  *  #  ***###  *  * #  *  *  # *  *  #                  ****##  *  *  # *  *  #  * *  #  *  * #  *  *  #    |
   1x +-+--****###--***###--****##--****###-****###--***###--***###--****##--****###-****###--***###--****##--****###--+-+
         astar   bzip2      gcc   gobmk h264ref   hmmlibquantum      mcf omnetpperlbench   sjengxalancbmk   hmean
  png: http://imgur.com/ArCbHqo

-                                    NBench, x86_64-linux-user. Host: Intel i7-6700K @ 4.00GHz

 1.12x +-+-------------------------------------------------------------------------------------------------------------+-+
       |                                                                                                                 |
       |     jr                                                           +++                                            |
  1.1x +jr+hash...........................................................####.........................................+-+
       |                                                               +++#| #                                           |
       |                                                                | #++#                                           |
 1.08x +-+................................+++................+++.+++..*****..#.........................................+-+
       |                                   |  +++             |   |   * | *  #                                           |
       |                                   |   |              |   |   *+++*  #                                           |
 1.06x +-+................................****###.............|...|...*...*..#.........................+++.............+-+
       |                                  *| * |#            ****###  *   *  #                          |                |
       |                                  *| *++#            *| * |#  *   *  #                        ####               |
 1.04x +-+................................*++*..#............*|.*.|#..*...*..#........................#.|#.............+-+
       |                                  *  *  #            *++*++#  *   *  #                     +++#++#               |
       |                                  *  *  #            *  *  #  *   *  #                      | #  #   +++####     |
 1.02x +-+................................*..*..#......+++...*..*..#..*...*..#.....................****..#..*****++#...+-+
       |         +++                      *  *  #   +++ |    *  *  #  *   *  #  +++                *| *  #  *+++*  #     |
       |      +++ |    +++ +++   ++++++   *  *  #  *****###  *  *  #  *   *  #   |  +++   ++++++   *++*  #  *   *  #     |
    1x +-++-+++++####++****###++++-+####+-*++*++#-+*+++*-+#++*++*++#++*+-+*++#+-+++####-+*****###++*++*++#++*+-+*++#+-++-+
       |     *****| #  *++* |#  *****| #  *  *  #  *   *++#  *  *  #  *   *  #  **** |#  *   *  #  *  *  #  *   *  #     |
       |     * | *| #  *  *++#  * | *++#  *  *  #  *   *  #  *  *  #  *   *  #  *| *++#  *   *  #  *  *  #  *   *  #     |
 0.98x +-+...*.|.*++#..*..*..#..*+++*..#..*..*..#..*...*..#..*..*..#..*...*..#..*++*..#..*...*..#..*..*..#..*...*..#...+-+
       |     *+++*  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #     |
       |     *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #     |
 0.96x +-+---*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###---+-+
       ASSIGNMENT BITFIELD   FOURFP EMULATION   HUFFMAN   LU DECOMPOSITIONEURAL NNUMERIC SOSTRING SORT     hmean
  png: http://imgur.com/ZXFX0hJ

-                                   NBench, arm-linux-user. Host: Intel i7-4790K @ 4.00GHz

  1.3x +-+-------------------------------------------------------------------------------------------------------------+-+
       |                            ####                                                                                 |
       |     jr                     #  #                                            +++                                  |
 1.25x +jr+hash.....................#..#...........................................####................................+-+
       |                            #  #                                           #  #                                  |
       |                            #  #                                           #  #                                  |
  1.2x +-+..........................#..#...........................................#..#................................+-+
       |                            #  #                                           #  #                                  |
       |                            #  #                                           #  #                                  |
 1.15x +-+..........................#..#...........................................#..#................................+-+
       |                            #  #                                  ####     #  #                                  |
       |                            #  #                                  #  #     #  #                                  |
  1.1x +-+..........................#..#..................................#..#.....#..#................................+-+
       |                            #  #                                  #  #     #  #                         +++      |
       |                            #  #               ####               #  #     #  #                         ####     |
 1.05x +-+..........................#..#...............#..#.....####......#..#.....#..#.........................#..#...+-+
       |                            #  #               #  #     #  #      #  #     #  #                +++      #  #     |
       |                   +++  *****  #     ####  *****  #     #  #   +++#  #  ****  #            ****###      #  #     |
    1x +-++-+*****###++****+++++*+-+*++#+-****++#-+*+++*-+#+++++#++#++*****++#+-*++*++#-+*****-++++*++*++#++*****++#+-++-+
       |     *   *  #  *  * |   *   *  #  *  *  #  *   *  #  ****  #  *   *  #  *  *  #  *   *###  *  *++#  *   *  #     |
       |     *   *  #  *  *###  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #     |
 0.95x +-+...*...*..#..*..*.|#..*...*..#..*..*..#..*...*..#..*..*..#..*...*..#..*..*..#..*...*..#..*..*..#..*...*..#...+-+
       |     *   *  #  *  * |#  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #     |
       |     *   *  #  *  * |#  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #     |
  0.9x +-+---*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###---+-+
       ASSIGNMENT BITFIELD   FOURFP EMULATION   HUFFMAN   LU DECOMPOSITIONEURAL NNUMERIC SOSTRING SORT     hmean
  png: http://imgur.com/FfD27ey

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1493263764-18657-12-git-send-email-cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Emilio G. Cota
b4aa297781 target/i386: optimize indirect branches
Speed up indirect branches by jumping to the target if it is valid.

Softmmu measurements (see later commit for user-mode numbers):

Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0.

-                  SPECint06 (test set), x86_64-softmmu (Ubuntu 16.04 guest). Host: Intel i7-4790K @ 4.00GHz

 2.4x +-+--------------------------------------------------------------------------------------------------------------+-+
      |                                                                                                                  |
      |   cross                                                                                                          |
 2.2x +cross+jr..........................................................................+++...........................+-+
      |                                                                                   |                              |
      |                                                                               +++ |                              |
   2x +-+..............................................................................|..|............................+-+
      |                                                                                |  |                              |
      |                                                                                |  |                              |
 1.8x +-+..............................................................................|####...........................+-+
      |                                                                                |# |#                             |
      |                                                                              **** |#                             |
 1.6x +-+............................................................................*.|*.|#...........................+-+
      |                                                                              * |* |#                             |
      |                                                                              * |* |#                             |
 1.4x +-+.......................................................................+++..*.|*.|#...........................+-+
      |                                                      ++++++             #### * |*++#             +++             |
      |                        +++                            |  |              #++# *++*  #          +++ |              |
 1.2x +-+......................###.....####....+++............|..|...........****..#.*..*..#....####...|.###.....####..+-+
      |        +++          **** #  ****  #    ####          ***###          *++*  # *  *  #    #++#  ****|#  +++#++#    |
      |    ****###     +++  *++* #  *++*  #  ++#  #    ####  *|* |#     +++  *  *  # *  *  #  ***  #  *| *|#  ****  #    |
   1x +-++-*++*++#++***###++*++*+#++*+-*++#+****++#++***++#+-*+*++#-+****##++*++*-+#+*++*-+#++*+*++#++*-+*+#++*++*++#-++-+
      |    *  *  #  * *  #  *  * #  *  *  # *  *  #  * *  #  *|* |#  *++* #  *  *  # *  *  #  * *  #  *  * #  *  *  #    |
      |    *  *  #  * *  #  *  * #  *  *  # *  *  #  * *  #  *+*++#  *  * #  *  *  # *  *  #  * *  #  *  * #  *  *  #    |
 0.8x +-+--****###--***###--****##--****###-****###--***###--***###--****##--****###-****###--***###--****##--****###--+-+
         astar   bzip2      gcc   gobmk h264ref   hmmlibquantum      mcf omnetpperlbench   sjengxalancbmk   hmean
  png: http://imgur.com/DU36YFU

NB. 'cross' represents the previous commit.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1493263764-18657-11-git-send-email-cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Emilio G. Cota
fe62089563 target/i386: optimize cross-page direct jumps in softmmu
Instead of unconditionally exiting to the exec loop, use the
gen_jr helper to jump to the target if it is valid.

Perf impact: see next commit's log.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1493263764-18657-10-git-send-email-cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Emilio G. Cota
1ebb1af1b8 target/i386: introduce gen_jr helper to generate lookup_and_goto_ptr
This helper will be used by subsequent changes.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1493263764-18657-9-git-send-email-cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Emilio G. Cota
8a6b28c7b5 target/arm: optimize indirect branches
Speed up indirect branches by jumping to the target if it is valid.

Softmmu measurements (see later commit for user-mode results):

Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0.

- Impact on Boot time

| setup  | ARM debian jessie boot+shutdown time | stddev |
|--------+--------------------------------------+--------|
| v2.9.0 |                                 8.84 |   0.07 |
| +cross |                                 8.85 |   0.03 |
| +jr    |                                 8.83 |   0.06 |

-                            NBench, arm-softmmu (debian jessie guest). Host: Intel i7-4790K @ 4.00GHz

  1.3x +-+-------------------------------------------------------------------------------------------------------------+-+
       |                                                                                                                 |
       |   cross                                                          ####                                           |
 1.25x +cross+jr..........................................................#++#.........................................+-+
       |                                                        ####      #  #                                           |
       |                                                     +++#  #      #  #                                           |
       |                                      +++            ****  #      #  #                                           |
  1.2x +-+...................................####............*..*..#......#..#.........................................+-+
       |                                  ****  #            *  *  #      #  #     ####                                  |
       |                                  *  *  #            *  *  #      #  #     #  #                                  |
 1.15x +-+................................*..*..#............*..*..#......#..#.....#..#................................+-+
       |                                  *  *  #            *  *  #      #  #     #  #                                  |
       |                                  *  *  #      ####  *  *  #      #  #     #  #                                  |
       |                                  *  *  #      #  #  *  *  #      #  #     #  #                         ####     |
  1.1x +-+................................*..*..#......#..#..*..*..#......#..#.....#..#.........................#..#...+-+
       |                                  *  *  #      #  #  *  *  #      #  #     #  #                         #  #     |
       |                                  *  *  #      #  #  *  *  #      #  #     #  #                         #  #     |
 1.05x +-+..........................####..*..*..#......#..#..*..*..#......#..#.....#..#......+++............*****..#...+-+
       |                        *****  #  *  *  #      #  #  *  *  #  *****  #     #  #   +++ |    ****###  *   *  #     |
       |                        *+++*  #  *  *  #      #  #  *  *  #  *+++*  #  ****  #  *****###  *  *  #  *   *  #     |
       |     *****###  +++####  *   *  #  *  *  #  *****  #  *  *  #  *   *  #  *  *  #  * | *++#  *  *  #  *   *  #     |
    1x +-++-+*+++*-+#++****++#++*+-+*++#+-*++*++#-+*+++*-+#++*++*++#++*+-+*++#+-*++*++#-+*+++*-+#++*++*++#++*+-+*++#+-++-+
       |     *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #     |
       |     *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #  *   *  #     |
 0.95x +-+---*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###---+-+
       ASSIGNMENT BITFIELD   FOURFP EMULATION   HUFFMAN   LU DECOMPOSITIONEURAL NNUMERIC SOSTRING SORT     hmean
  png: http://imgur.com/eOLmZNR

NB. 'cross' represents the previous commit.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1493263764-18657-8-git-send-email-cota@braap.org>
[rth: Replace gen_jr global variable with DISAS_EXIT state.]
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Emilio G. Cota
7ad55b4ffd target/arm: optimize cross-page direct jumps in softmmu
Instead of unconditionally exiting to the exec loop, use the
lookup_and_goto_ptr helper to jump to the target if it is valid.

Perf impact: see next commit's log.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1493263764-18657-7-git-send-email-cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Emilio G. Cota
5cb4ef80f6 tcg/i386: implement goto_ptr
Suggested-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1493263764-18657-6-git-send-email-cota@braap.org>
[rth: Reuse goto_ptr epilogue for exit_tb 0.]
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Emilio G. Cota
cedbcb0152 tcg: Introduce goto_ptr opcode and tcg_gen_lookup_and_goto_ptr
Instead of exporting goto_ptr directly to TCG frontends, export
tcg_gen_lookup_and_goto_ptr(), which calls goto_ptr with the pointer
returned by the lookup_tb_ptr() helper. This is the only use case
we have for goto_ptr and lookup_tb_ptr, so having this function is
very convenient. Furthermore, it trivially allows us to avoid calling
the lookup helper if goto_ptr is not implemented by the backend.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1493263764-18657-2-git-send-email-cota@braap.org>
Message-Id: <1493263764-18657-3-git-send-email-cota@braap.org>
Message-Id: <1493263764-18657-4-git-send-email-cota@braap.org>
Message-Id: <1493263764-18657-5-git-send-email-cota@braap.org>
[rth: Squashed 4 related commits.]
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
374aae6534 qemu/atomic: Loosen restrictions for 64-bit ILP32 hosts
We need to coordinate with the TCG_OVERSIZED_GUEST test in cputlb.c,
and allow 64-bit atomics even though sizeof(void *) == 4.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
f1079bb8f9 tcg/sparc: Use the proper compilation flags for 32-bit
We have required a v9 cpu since 9b9c37c364.
However, the flags we were using did not reliably enable v8plus, which
meant that the compiler didn't know it could inline 64-bit atomics.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Richard Henderson
1639a965d3 target/nios2: Fix 64-bit ilp32 compilation
Avoid a "cast from pointer to integer of different size" warning
by using the proper host type.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-05 09:25:42 -07:00
Peter Maydell
199e19ee53 Merge remote-tracking branch 'remotes/mjt/tags/trivial-patches-fetch' into staging
trivial patches for 2017-06-05

# gpg: Signature made Mon 05 Jun 2017 15:23:46 BST
# gpg:                using RSA key 0x701B4F6B1A693E59
# gpg: Good signature from "Michael Tokarev <mjt@tls.msk.ru>"
# gpg:                 aka "Michael Tokarev <mjt@corpit.ru>"
# gpg:                 aka "Michael Tokarev <mjt@debian.org>"
# Primary key fingerprint: 6EE1 95D1 886E 8FFB 810D  4324 457C E0A0 8044 65C5
#      Subkey fingerprint: 7B73 BAD6 8BE7 A2C2 8931  4B22 701B 4F6B 1A69 3E59

* remotes/mjt/tags/trivial-patches-fetch: (21 commits)
  hw/core: nmi.c can be compiled as common-obj nowadays
  dump: fix memory_mapping_filter leak
  ide-test: check return of fwrite
  help: Add newline to end of thread option help text
  qemu-ga: remove useless allocation
  scsi/lsi53c895a: Remove unused lsi_mem_*() return value
  qapi: Fix some QMP documentation regressions
  hw/mips: add missing include
  register: display register prefix (name) since it is available
  hw/sparc: use ARRAY_SIZE() macro
  hw/xtensa: sim: use g_string/g_new
  target/arm: add data cache invalidation cp15 instruction to cortex-r5
  block: Correct documentation for BLOCK_WRITE_THRESHOLD
  trivial: Remove unneeded ifndef in memory.h
  altera_timer: fix incorrect memset
  configure: Detect native NetBSD curses(3)
  tests/libqtest: Print error instead of aborting when env variable is missing
  docs/qdev-device-use.txt: update section Default Devices
  docs qemu-doc: Avoid ide-drive, it's deprecated
  qemu-doc: Add hyperlinks to further license information
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-05 15:28:12 +01:00
Thomas Huth
03e947f9c2 hw/core: nmi.c can be compiled as common-obj nowadays
The target-specific code in nmi.c has been removed with this commit:

	commit f7e981f295
	nmi: remove x86 specific nmi handling

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-05 17:23:36 +03:00
Peter Maydell
cb8b8ef457 Merge remote-tracking branch 'remotes/elmarco/tags/chrfe-pull-request' into staging
# gpg: Signature made Fri 02 Jun 2017 20:12:48 BST
# gpg:                using RSA key 0xDAE8E10975969CE5
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>"
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* remotes/elmarco/tags/chrfe-pull-request:
  char: move char devices to chardev/
  char: make chr_fe_deinit() optionaly delete backend
  char: rename functions that are not part of fe
  char: move CharBackend handling in char-fe unit
  char: generalize qemu_chr_write_all()
  be-hci: use backend functions
  chardev: serial & parallel declaration to own headers
  chardev: move headers to include/chardev
  Remove/replace sysemu/char.h inclusion
  char-win: close file handle except with console
  char-win: rename hcom->file
  char-win: rename win_chr_init/poll win_chr_serial_init/poll
  char-win: remove WinChardev.len
  char-win: simplify win_chr_read()
  char: cast ARRAY_SIZE() as signed to silent warning on empty array

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-05 10:09:14 +01:00
Marc-André Lureau
22c3aea8db dump: fix memory_mapping_filter leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
John Snow
543f8f13e2 ide-test: check return of fwrite
To quiet patchew, add an assert for fwrite's return value.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Suraj Jitindar Singh
f603164ae4 help: Add newline to end of thread option help text
The help text for the thread sub option of the accel option is missing
a newline at the end. This is annoying as it makes it hard to see the
help text for the next option.

Add the new line so that the following option help text (-smp) is
displayed on a new line rather on the same line and directly after
the thread help.

Before patch:

-accel [accel=]accelerator[,thread=single|multi]
                select accelerator (kvm, xen, hax or tcg; use 'help' for a list)
                thread=single|multi (enable multi-threaded TCG)-smp [cpus=]n[,maxcpus=cpus][,cores=cores][,threads=threads][,sockets=sockets]
                set the number of CPUs to 'n' [default=1]
                maxcpus= maximum number of total cpus, including
                offline CPUs for hotplug, etc
                cores= number of CPU cores on one socket
                threads= number of threads on one CPU core
                sockets= number of discrete sockets in the system

After patch:

-accel [accel=]accelerator[,thread=single|multi]
                select accelerator (kvm, xen, hax or tcg; use 'help' for a list)
                thread=single|multi (enable multi-threaded TCG)
-smp [cpus=]n[,maxcpus=cpus][,cores=cores][,threads=threads][,sockets=sockets]
                set the number of CPUs to 'n' [default=1]
                maxcpus= maximum number of total cpus, including
                offline CPUs for hotplug, etc
                cores= number of CPU cores on one socket
                threads= number of threads on one CPU core
                sockets= number of discrete sockets in the system

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Marc-André Lureau
7064024dee qemu-ga: remove useless allocation
There is no need to duplicate a fixed string.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Mao Zhongyi
3975bb56a7 scsi/lsi53c895a: Remove unused lsi_mem_*() return value
lsi_mem_read/write() always return 0 about which their
callers actually don't care. Change the function type
to void.

Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Eric Blake
244d04db58 qapi: Fix some QMP documentation regressions
In the process of getting rid of docs/qmp-commands.txt, we managed
to regress on some of the text that changed after the point where
the move was first branched and when the move actually occurred.
For example, commit 3282eca for blockdev-snapshot re-added the
extra "options" layer which had been cleaned up in commit 0153d2f.

This clears up all regressions identified over the range
02b351d..bd6092e:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05127.html
as well as a cleanup to x-blockdev-remove-medium to prefer
'id' over 'device' (matching the cleanup for 'eject').

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Philippe Mathieu-Daudé
2283adfb0a hw/mips: add missing include
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Acked-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Philippe Mathieu-Daudé
016b4a93e7 register: display register prefix (name) since it is available
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Philippe Mathieu-Daudé
1f6fb58d05 hw/sparc: use ARRAY_SIZE() macro
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Max Filippov
bb0b6f39c0 hw/xtensa: sim: use g_string/g_new
Replace malloc/free/sprintf with g_string/g_string_printf/g_string_free.
Replace g_malloc with g_new when allocating the MemoryRegion to get more
type safety.

Suggested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Luc MICHEL
95e9a242e2 target/arm: add data cache invalidation cp15 instruction to cortex-r5
The cp15, CRn=15, opc1=0, CRm=5, opc2=0 instruction invalidates all the
data cache on the cortex-r5. Implementing it as a NOP.

Signed-off-by: Luc MICHEL <luc.michel@git.antfield.fr>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Eric Blake
f85d66f47f block: Correct documentation for BLOCK_WRITE_THRESHOLD
Use the correct command name.

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Juan Quintela
e8758b6229 trivial: Remove unneeded ifndef in memory.h
All the file is surounded already by #ifndef CONFIG_USER_ONLY.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Paolo Bonzini
cc16ee9d4e altera_timer: fix incorrect memset
Use sizeof instead of ARRAY_SIZE, fixing -Wmemset-elt-size with recent
GCC versions.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Kamil Rytarowski
271f37abb5 configure: Detect native NetBSD curses(3)
NetBSD ships with traditional BSD curses with compatibility with ncurses.
qemu works nicely with the basesystem version of curses(3) from NetBSD.

The only mismatch between curses(3) and ncurses is the lack of
curses_version() in the NetBSD version. This function is used solely in
the configure script, therefore eliminate it from the curses(3) detection.

With this change applied, configure detects correctly curses frontend.

Signed-off-by: Kamil Rytarowski <n54@gmx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Thomas Huth
7c933ad61b tests/libqtest: Print error instead of aborting when env variable is missing
When you currently try to run a test directly from the command line
without setting the QTEST_QEMU_BINARY environment variable first,
you are presented with an unhelpful assertion message like this:

 ERROR:tests/libqtest.c:163:qtest_init_without_qmp_handshake:
 assertion failed: (qemu_binary != NULL)
 Aborted (core dumped)

Let's replace the assert() with a more user friendly error message
instead.

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Markus Armbruster
7a0bbd55e5 docs/qdev-device-use.txt: update section Default Devices
Resynchronize the table of default device suppressions with vl.c's
default_list[].

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Markus Armbruster
1c9f3b887b docs qemu-doc: Avoid ide-drive, it's deprecated
Suggested-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Thomas Huth
2f8d8f01c8 qemu-doc: Add hyperlinks to further license information
Add a link to the GPLv2 and a link to the LICENSE file in the
QEMU repository to fix the two TODO items in this appendix.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Thomas Huth
3f2ce724f1 qemu-doc: Move the qemu-ga description into a separate chapter
The qemu-ga description is currently a subsection of the Disk Images
chapter - which does not make much sense since the qemu-ga is not
directly related to disk images. So let's move this information
into a separate chapter instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-06-04 18:42:55 +03:00
Peter Maydell
c6e84fbd44 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, vhost: fixes, features

IOTLB support in vhost-user.
A bunch of fixes all over the place.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 02 Jun 2017 17:33:25 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  spec/vhost-user spec: Add IOMMU support
  vhost-user: add slave-req-fd support
  vhost-user: add vhost_user to hold the chr
  vhost: rework IOTLB messaging
  vhost: propagate errors in vhost_device_iotlb_miss()
  virtio-serial: fix segfault on disconnect
  virtio: add virtqueue_alloc_element tracepoint
  virtio-serial-bus: Unset hotplug handler when unrealize

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 17:46:22 +01:00
Maxime Coquelin
6dcdd06e3b spec/vhost-user spec: Add IOMMU support
This patch specifies and implements the master/slave communication
to support device IOTLB in slave.

The vhost_iotlb_msg structure introduced for kernel backends is
re-used, making the design close between the two backends.

An exception is the use of the secondary channel to enable the
slave to send IOTLB miss requests to the master.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-02 18:57:17 +03:00
Marc-André Lureau
4bbeeba023 vhost-user: add slave-req-fd support
Learn to give a socket to the slave to let him make requests to the
master.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-02 18:57:17 +03:00
Marc-André Lureau
2152f3fead vhost-user: add vhost_user to hold the chr
Next patches will add more fields to the structure

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-02 18:57:17 +03:00
Maxime Coquelin
020e571b8b vhost: rework IOTLB messaging
This patch reworks IOTLB messaging to prepare for vhost-user
device IOTLB support.

IOTLB messages handling is extracted from vhost-kernel backend,
so that only the messages transport remains backend specifics.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-02 18:57:17 +03:00
Maxime Coquelin
fc58bd0d97 vhost: propagate errors in vhost_device_iotlb_miss()
Some backends might want to know when things went wrong.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-02 18:57:17 +03:00
Stefan Hajnoczi
46764fe09c virtio-serial: fix segfault on disconnect
Since commit d4c19cdeeb ("virtio-serial:
add missing virtio_detach_element() call") the following commands may
cause QEMU to segfault:

  $ qemu -M accel=kvm -cpu host -m 1G \
         -drive if=virtio,file=test.img,format=raw \
         -device virtio-serial-pci,id=virtio-serial0 \
         -chardev socket,id=channel1,path=/tmp/chardev.sock,server,nowait \
         -device virtserialport,chardev=channel1,bus=virtio-serial0.0,id=port1
  $ nc -U /tmp/chardev.sock
  ^C

  (guest)$ cat /dev/zero >/dev/vport0p1

The segfault is non-deterministic: if the event loop notices the socket
has been closed then there is no crash.  The disconnect has to happen
right before QEMU attempts to write data to the socket.

The backtrace is as follows:

  Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
  0x00005555557e0698 in do_flush_queued_data (port=0x5555582cedf0, vq=0x7fffcc854290, vdev=0x55555807b1d0) at hw/char/virtio-serial-bus.c:180
  180           for (i = port->iov_idx; i < port->elem->out_num; i++) {
  #1  0x000055555580d363 in virtio_queue_notify_vq (vq=0x7fffcc854290) at hw/virtio/virtio.c:1524
  #2  0x000055555580d363 in virtio_queue_host_notifier_read (n=0x7fffcc8542f8) at hw/virtio/virtio.c:2430
  #3  0x0000555555b3482c in aio_dispatch_handlers (ctx=ctx@entry=0x5555566b8c80) at util/aio-posix.c:399
  #4  0x0000555555b350d8 in aio_dispatch (ctx=0x5555566b8c80) at util/aio-posix.c:430
  #5  0x0000555555b3212e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:261
  #6  0x00007fffde71de52 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
  #7  0x0000555555b34353 in glib_pollfds_poll () at util/main-loop.c:213
  #8  0x0000555555b34353 in os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:261
  #9  0x0000555555b34353 in main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:517
  #10 0x0000555555773207 in main_loop () at vl.c:1917
  #11 0x0000555555773207 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4751

The do_flush_queued_data() function does not anticipate chardev close
events during vsc->have_data().  It expects port->elem to remain
non-NULL for the duration its for loop.

The fix is simply to return from do_flush_queued_data() if the port
closes because the close event already frees port->elem and drains the
virtqueue - there is nothing left for do_flush_queued_data() to do.

Reported-by: Sitong Liu <siliu@redhat.com>
Reported-by: Min Deng <mdeng@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-02 18:57:17 +03:00
Paolo Bonzini
b0ac429f13 virtio: add virtqueue_alloc_element tracepoint
This tracepoint can help diagnosing failures due to memory
fragmentation in the guest.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-02 18:57:17 +03:00
Ladi Prosek
f811f97040 virtio-serial-bus: Unset hotplug handler when unrealize
Virtio serial device controls the lifetime of virtio-serial-bus and
virtio-serial-bus links back to the device via its hotplug-handler
property. This extra ref-count prevents the device from getting
finalized, leaving the VirtIODevice memory listener registered and
leading to use-after-free later on.

This patch addresses the same issue as Fam Zheng's
"virtio-scsi: Unset hotplug handler when unrealize"
only for a different virtio device.

Cc: qemu-stable@nongnu.org
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
2017-06-02 18:57:16 +03:00
Peter Maydell
e32fb6da7e Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Fri 02 Jun 2017 16:32:39 BST
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  gluster: add support for PREALLOC_MODE_FALLOC

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 16:51:17 +01:00
Niels de Vos
df3a429ae8 gluster: add support for PREALLOC_MODE_FALLOC
Add missing support for "preallocation=falloc" to the Gluster block
driver. This change bases its logic on that of block/file-posix.c and
removed the gluster_supports_zerofill() and qemu_gluster_zerofill()
functions in favour of #ifdef checks in an easy to read
switch-statement.

Both glfs_zerofill() and glfs_fallocate() have been introduced with
GlusterFS 3.5.0 (pkg-config glusterfs-api = 6). A #define for the
availability of glfs_fallocate() has been added to ./configure.

Reported-by: Satheesaran Sundaramoorthi <sasundar@redhat.com>
Signed-off-by: Niels de Vos <ndevos@redhat.com>
Message-id: 20170528063114.28691-1-ndevos@redhat.com
URL: https://bugzilla.redhat.com/1450759
Signed-off-by: Niels de Vos <ndevos@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-06-02 10:51:47 -04:00
Peter Maydell
1448228af3 Merge remote-tracking branch 'remotes/mcayland/tags/qemu-sparc-signed' into staging
qemu-sparc update

# gpg: Signature made Fri 02 Jun 2017 06:09:17 BST
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* remotes/mcayland/tags/qemu-sparc-signed:
  hw/sparc64: QOM'ify sun4u.c
  hw/sparc: QOM'ify sun4m.c
  hw/timer: QOM'ify slavio_timer
  hw/timer: QOM'ify m48txx_sysbus
  hw/misc: QOM'ify slavio_misc.c
  hw/dma: QOM'ify sun4m_iommu.c
  hw/dma: QOM'ify sparc32_dma.c
  hw/misc: QOM'ify eccmemctl.c

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 15:19:23 +01:00
Peter Maydell
d47a851cae Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170601' into staging
migration/next for 20170601

# gpg: Signature made Thu 01 Jun 2017 17:51:04 BST
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration/20170601:
  migration: Move include/migration/block.h into migration/
  migration: Export ram.c functions in its own file
  migration: Create include for migration snapshots
  migration: Export rdma.c functions in its own file
  migration: Export tls.c functions in its own file
  migration: Export socket.c functions in its own file
  migration: Export fd.c functions in its own file
  migration: Export exec.c functions in its own file
  migration: Split qemu-file.h
  migration: Remove unneeded includes of migration/vmstate.h
  migration: shut src return path unconditionally
  migration: fix leak of src file on dst
  migration: Remove section_id parameter from vmstate_load
  migration: loadvm handlers are not used
  migration: Use savevm_handlers instead of loadvm copy

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 14:07:53 +01:00
Peter Maydell
7693cd7cb6 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170602' into staging
target-arm queue:
 * virt: numa: provide ACPI distance info when needed
 * aspeed: fix i2c controller bugs
 * M profile: support MPU
 * gicv3: fix mishandling of BPR1, VBPR1
 * load_uboot_image: don't assume a full header read
 * libvixl: Correct build failures on NetBSD

# gpg: Signature made Fri 02 Jun 2017 12:00:42 BST
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170602: (25 commits)
  hw/arm/virt: fdt: generate distance-map when needed
  hw/arm/virt-acpi-build: build SLIT when needed
  aspeed: add some I2C devices to the Aspeed machines
  aspeed/i2c: introduce a state machine
  aspeed/i2c: handle LAST command under the RX command
  aspeed/i2c: improve command handling
  arm: Implement HFNMIENA support for M profile MPU
  arm: add MPU support to M profile CPUs
  armv7m: Classify faults as MemManage or BusFault
  arm: All M profile cores are PMSA
  armv7m: Implement M profile default memory map
  armv7m: Improve "-d mmu" tracing for PMSAv7 MPU
  arm: Remove unnecessary check on cpu->pmsav7_dregion
  arm: Don't let no-MPU PMSA cores write to SCTLR.M
  arm: Don't clear ARM_FEATURE_PMSA for no-mpu configs
  arm: Clean up handling of no-MPU PMSA CPUs
  arm: Use different ARMMMUIdx values for M profile
  arm: Add support for M profile CPUs having different MMU index semantics
  arm: Use the mmu_idx we're passed in arm_cpu_do_unaligned_access()
  target/arm: clear PMUVER field of AA64DFR0 when vPMU=off
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 13:05:06 +01:00
Andrew Jones
c7637c04be hw/arm/virt: fdt: generate distance-map when needed
This is based on patch Shannon Zhao originally posted.

Cc: Shannon Zhao <zhaoshenglong@huawei.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Message-id: 20170529173751.3443-3-drjones@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:49 +01:00
Andrew Jones
94a66456f1 hw/arm/virt-acpi-build: build SLIT when needed
Cc: Shannon Zhao <zhaoshenglong@huawei.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Message-id: 20170529173751.3443-2-drjones@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:49 +01:00
Cédric Le Goater
2cf6cb500c aspeed: add some I2C devices to the Aspeed machines
Let's add an RTC to the palmetto BMC and a LM75 temperature sensor to
the AST2500 EVB to start with.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1494827476-1487-5-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:49 +01:00
Cédric Le Goater
4960f084cf aspeed/i2c: introduce a state machine
The Aspeed I2C controller maintains a state machine in the command
register, which is mostly used for debug.

Let's start adding a few states to handle abnormal STOP
commands. Today, the model uses the busy status of the bus as a
condition to do so but it is not precise enough.

Also remove the ABNORMAL bit for failing TX commands. This is
incorrect with respect to the specs.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1494827476-1487-4-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:49 +01:00
Cédric Le Goater
d0efdc1686 aspeed/i2c: handle LAST command under the RX command
Today, the LAST command is handled with the STOP command but this is
incorrect. Also nack the I2C bus when a LAST is issued.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1494827476-1487-3-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:49 +01:00
Cédric Le Goater
ddabca757a aspeed/i2c: improve command handling
Multiple I2C commands can be fired simultaneously and the controller
execute the commands following these priorities:

  (1) Master Start Command
  (2) Master Transmit Command
  (3) Slave Transmit Command or Master Receive Command
  (4) Master Stop Command

The current code is incorrect with respect to the above sequence and
needs to be reworked to handle each individual command.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1494827476-1487-2-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:49 +01:00
Peter Maydell
3bef701256 arm: Implement HFNMIENA support for M profile MPU
Implement HFNMIENA support for the M profile MPU. This bit controls
whether the MPU is treated as enabled when executing at execution
priorities of less than zero (in NMI, HardFault or with the FAULTMASK
bit set).

Doing this requires us to use a different MMU index for "running
at execution priority < 0", because we will have different
access permissions for that case versus the normal case.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1493122030-32191-14-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:49 +01:00
Michael Davidsaver
29c483a506 arm: add MPU support to M profile CPUs
The M series MPU is almost the same as the already implemented R
profile MPU (v7 PMSA).  So all we need to implement here is the MPU
register interface in the system register space.

This implementation has the same restriction as the R profile MPU
that it doesn't permit regions to be sized down smaller than 1K.

We also do not yet implement support for MPU_CTRL.HFNMIENA; this
bit should if zero disable use of the MPU when running HardFault,
NMI or with FAULTMASK set to 1 (ie at an execution priority of
less than zero) -- if the MPU is enabled we don't treat these
cases any differently.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
Message-id: 1493122030-32191-13-git-send-email-peter.maydell@linaro.org
[PMM: Keep all the bits in mpu_ctrl field, rather than
 using SCTLR bits for them; drop broken HFNMIENA support;
 various cleanup]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:48 +01:00
Michael Davidsaver
5dd0641d23 armv7m: Classify faults as MemManage or BusFault
General logic is that operations stopped by the MPU are MemManage,
and those which go through the MPU and are caught by the unassigned
handle are BusFault. Distinguish these by looking at the
exception.fsr values, and set the CFSR bits and (if appropriate)
fill in the BFAR or MMFAR with the exception address.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
Message-id: 1493122030-32191-12-git-send-email-peter.maydell@linaro.org
[PMM: i-side faults do not set BFAR/MMFAR, only d-side;
 added some CPU_LOG_INT logging]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:51:48 +01:00
Peter Maydell
790a11503c arm: All M profile cores are PMSA
All M profile CPUs are PMSA, so set the feature bit.
(We haven't actually implemented the M profile MPU register
interface yet, but setting this feature bit gives us closer
to correct behaviour for the MPU-disabled case.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1493122030-32191-11-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:48 +01:00
Michael Davidsaver
3a00d560bc armv7m: Implement M profile default memory map
Add support for the M profile default memory map which is used
if the MPU is not present or disabled.

The main differences in behaviour from implementing this
correctly are that we set the PAGE_EXEC attribute on
the right regions of memory, such that device regions
are not executable.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
Message-id: 1493122030-32191-10-git-send-email-peter.maydell@linaro.org
[PMM: rephrased comment and commit message; don't mark
 the flash memory region as not-writable; list all
 the cases in the default map explicitly rather than
 using a 'default' case for the non-executable regions]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:48 +01:00
Michael Davidsaver
c9f9f1246d armv7m: Improve "-d mmu" tracing for PMSAv7 MPU
Improve the "-d mmu" tracing for the PMSAv7 MPU translation
process as an aid in debugging guest MPU configurations:
 * fix a missing newline for a guest-error log
 * report the region number with guest-error or unimp
   logs of bad region register values
 * add a log message for the overall result of the lookup
 * print "0x" prefix for hex values

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1493122030-32191-9-git-send-email-peter.maydell@linaro.org
[PMM: a little tidyup, report region number in all messages
 rather than just one]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:48 +01:00
Peter Maydell
e9235c6983 arm: Remove unnecessary check on cpu->pmsav7_dregion
Now that we enforce both:
 * pmsav7_dregion == 0 implies has_mpu == false
 * PMSA with has_mpu == false means SCTLR.M cannot be set
we can remove a check on pmsav7_dregion from get_phys_addr_pmsav7(),
because we can only reach this code path if the MPU is enabled
(and so region_translation_disabled() returned false).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1493122030-32191-8-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:48 +01:00
Peter Maydell
06312febfb arm: Don't let no-MPU PMSA cores write to SCTLR.M
If the CPU is a PMSA config with no MPU implemented, then the
SCTLR.M bit should be RAZ/WI, so that the guest can never
turn on the non-existent MPU.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1493122030-32191-7-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:48 +01:00
Peter Maydell
f50cd31413 arm: Don't clear ARM_FEATURE_PMSA for no-mpu configs
Fix the handling of QOM properties for PMSA CPUs with no MPU:

Allow no-MPU to be specified by either:
 * has-mpu = false
 * pmsav7_dregion = 0
and make setting one imply the other. Don't clear the PMSA
feature bit in this situation.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1493122030-32191-6-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:47 +01:00
Peter Maydell
452a095526 arm: Clean up handling of no-MPU PMSA CPUs
ARM CPUs come in two flavours:
 * proper MMU ("VMSA")
 * only an MPU ("PMSA")
For PMSA, the MPU may be implemented, or not (in which case there
is default "always acts the same" behaviour, but it isn't guest
programmable).

QEMU is a bit confused about how we indicate this: we have an
ARM_FEATURE_MPU, but it's not clear whether this indicates
"PMSA, not VMSA" or "PMSA and MPU present" , and sometimes we
use it for one purpose and sometimes the other.

Currently trying to implement a PMSA-without-MPU core won't
work correctly because we turn off the ARM_FEATURE_MPU bit
and then a lot of things which should still exist get
turned off too.

As the first step in cleaning this up, rename the feature
bit to ARM_FEATURE_PMSA, which indicates a PMSA CPU (with
or without MPU).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1493122030-32191-5-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:47 +01:00
Peter Maydell
e7b921c2d9 arm: Use different ARMMMUIdx values for M profile
Make M profile use completely separate ARMMMUIdx values from
those that A profile CPUs use. This is a prelude to adding
support for the MPU and for v8M, which together will require
6 MMU indexes which don't map cleanly onto the A profile
uses:
 non secure User
 non secure Privileged
 non secure Privileged, execution priority < 0
 secure User
 secure Privileged
 secure Privileged, execution priority < 0

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1493122030-32191-4-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:47 +01:00
Peter Maydell
8bd5c82030 arm: Add support for M profile CPUs having different MMU index semantics
The M profile CPU's MPU has an awkward corner case which we
would like to implement with a different MMU index.

We can avoid having to bump the number of MMU modes ARM
uses, because some of our existing MMU indexes are only
used by non-M-profile CPUs, so we can borrow one.
To avoid that getting too confusing, clean up the code
to try to keep the two meanings of the index separate.

Instead of ARMMMUIdx enum values being identical to core QEMU
MMU index values, they are now the core index values with some
high bits set. Any particular CPU always uses the same high
bits (so eventually A profile cores and M profile cores will
use different bits). New functions arm_to_core_mmu_idx()
and core_to_arm_mmu_idx() convert between the two.

In general core index values are stored in 'int' types, and
ARM values are stored in ARMMMUIdx types.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1493122030-32191-3-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:47 +01:00
Peter Maydell
e517d95b63 arm: Use the mmu_idx we're passed in arm_cpu_do_unaligned_access()
When identifying the DFSR format for an alignment fault, use
the mmu index that we are passed, rather than calling cpu_mmu_index()
to get the mmu index for the current CPU state. This doesn't actually
make any difference since the only cases where the current MMU index
differs from the index used for the load are the "unprivileged
load/store" instructions, and in that case the mmu index may
differ but the translation regime is the same (apart from the
"use from Hyp mode" case which is UNPREDICTABLE).
However it's the more logical thing to do.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1493122030-32191-2-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:47 +01:00
Wei Huang
2b3ffa9292 target/arm: clear PMUVER field of AA64DFR0 when vPMU=off
The PMUv3 driver of linux kernel (in arch/arm64/kernel/perf_event.c)
relies on the PMUVER field of id_aa64dfr0_el1 to decide if PMU support
is present or not. This patch clears the PMUVER field under TCG mode
when vPMU=off. Without it, PMUv3 will init insider guest VMs even
with vPMU=off. This patch also removes a redundant line inside the
if-statement.

Signed-off-by: Wei Huang <wei@redhat.com>
Message-id: 1495123889-32301-1-git-send-email-wei@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:47 +01:00
Peter Maydell
a89ff39ee9 hw/intc/arm_gicv3_cpuif: Fix priority masking for NS BPR1
When we calculate the mask to use to get the group priority from
an interrupt priority, the way that NS BPR1 is handled differs
from how BPR0 and S BPR1 work -- a BPR1 value of 1 means
the group priority is in bits [7:1], whereas for BPR0 and S BPR1
this is indicated by a 0 BPR value.

Subtract 1 from the BPR value before creating the mask if
we're using the NS BPR value, for both hardware and virtual
interrupts, as the GICv3 pseudocode does, and fix the comments
accordingly.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1493226792-3237-4-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:47 +01:00
Peter Maydell
8193d4617c hw/intc/arm_gicv3_cpuif: Don't let BPR be set below its minimum
icc_bpr_write() was not enforcing that writing a value below the
minimum for the BPR should behave as if the BPR was set to the
minimum value. This doesn't make a difference for the secure
BPRs (since we define the minimum for the QEMU implementation
as zero) but did mean we were allowing the NS BPR1 to be set to
0 when 1 should be the lowest value.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1493226792-3237-3-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:47 +01:00
Peter Maydell
f5dc1b7767 hw/intc/arm_gicv3_cpuif: Fix reset value for VMCR_EL2.VBPR1
We were setting the VBPR1 field of VMCR_EL2 to icv_min_vbpr()
on reset, but this is not correct. The field should reset to
the minimum value of ICV_BPR0_EL1 plus one.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1493226792-3237-2-git-send-email-peter.maydell@linaro.org
2017-06-02 11:51:46 +01:00
Andrew Jones
a18e93125d load_uboot_image: don't assume a full header read
Don't allow load_uboot_image() to proceed when less bytes than
header-size was read.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-id: 20170524091315.20284-1-drjones@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:46 +01:00
Kamil Rytarowski
993063fbb5 libvixl: Correct build failures on NetBSD
Ensure that C99 macros are defined regardless of the inclusion order of
headers in vixl. This is required at least on NetBSD.

The vixl/globals.h headers defines __STDC_CONSTANT_MACROS and must be
included before other system headers.

This file defines unconditionally the following macros, without altering
the original sources:
 - __STDC_CONSTANT_MACROS
 - __STDC_LIMIT_MACROS
 - __STDC_FORMAT_MACROS

Signed-off-by: Kamil Rytarowski <n54@gmx.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170514051820.15985-1-n54@gmx.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-02 11:51:46 +01:00
Marc-André Lureau
6b10e573d1 char: move char devices to chardev/
Suggested by Paolo Bonzini during series review.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:53 +04:00
Marc-André Lureau
1ce2610c10 char: make chr_fe_deinit() optionaly delete backend
This simplifies removing a backend for a frontend user (no need to
retrieve the associated driver and separate delete call etc).

NB: many frontends have questionable handling of ending a chardev. They
should probably delete the backend to prevent broken reusage.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:53 +04:00
Marc-André Lureau
a9b1ca38c2 char: rename functions that are not part of fe
There is no clear reason to have those functions associated with
frontend.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:53 +04:00
Marc-André Lureau
4d43a603c7 char: move CharBackend handling in char-fe unit
Move all the frontend struct and methods to a seperate unit. This avoids
accidentally mixing backend and frontend calls, and helps with readabilty.

Make qemu_chr_replay() a macro shared by both char and char-fe.

Export qemu_chr_write(), and use a macro for qemu_chr_write_all()

(nb: yes, CharBackend is for char frontend :)

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:53 +04:00
Marc-André Lureau
c90e9392ef char: generalize qemu_chr_write_all()
qemu_chr_fe_write() is similar to qemu_chr_write_all(): the later write
all with a chardev backend.

Make qemu_chr_write() and qemu_chr_fe_write_buffer() take an 'all'
argument. If false, handle 'partial' write the way qemu_chr_fe_write()
use to, and call qemu_chr_write() from qemu_chr_fe_write().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:53 +04:00
Marc-André Lureau
93a78e4124 be-hci: use backend functions
Avoid accessing CharBackend directly, use qemu_chr_be_* methods instead.

be->chr_read should exists if qemu_chr_be_can_write() is true.

(use qemu_chr_be_write(), _impl() bypasses replay)

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Andrzej Zaborowski <balrogg@gmail.com>
2017-06-02 11:33:53 +04:00
Marc-André Lureau
7566c6efe7 chardev: serial & parallel declaration to own headers
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:52 +04:00
Marc-André Lureau
8228e353d8 chardev: move headers to include/chardev
So they are all in one place. The following patch will move serial &
parallel declarations to the respective headers.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:52 +04:00
Marc-André Lureau
f664b88247 Remove/replace sysemu/char.h inclusion
Those are apparently unnecessary includes.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:52 +04:00
Marc-André Lureau
541815ff7f char-win: close file handle except with console
Only the console handle shouldn't be closed, however, the "file" handle
should.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:52 +04:00
Marc-André Lureau
ef0f272f38 char-win: rename hcom->file
hcom is the name of the file handle, regardless of the actual chardev
driver (serial, file, console etc..). Rename it to be more explicit.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:52 +04:00
Marc-André Lureau
221e659c3f char-win: rename win_chr_init/poll win_chr_serial_init/poll
Those 2 functions are specific to serial chardev, make it more clear.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:52 +04:00
Marc-André Lureau
6ce8e0eb58 char-win: remove WinChardev.len
The "len" argument can be passed directly to win_chr_read()

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:52 +04:00
Marc-André Lureau
b88ee02594 char-win: simplify win_chr_read()
win_chr_read_poll() is always used before win_chr_read().
We can easily fold win_chr_readfile() too.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-06-02 11:33:52 +04:00
Philippe Mathieu-Daudé
c7e47c63e0 char: cast ARRAY_SIZE() as signed to silent warning on empty array
chardev/char.c: In function 'chardev_name_foreach':
chardev/char.c:546:19: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits]
     for (i = 0; i < ARRAY_SIZE(chardev_alias_table); i++) {
                   ^
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170530120919.8874-1-f4bug@amsat.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
2017-06-02 11:33:35 +04:00
xiaoqiang zhao
78fb261db1 hw/sparc64: QOM'ify sun4u.c
Drop the old SysBusDeviceClass::init and use instance_init
or DeviceClass::realize instead

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-06-02 05:54:43 +01:00
xiaoqiang zhao
dc8b6dd984 hw/sparc: QOM'ify sun4m.c
Drop the old SysBusDeviceClass::init and use instance_init
or DeviceClass::realize instead

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-06-02 05:54:43 +01:00
xiaoqiang zhao
4410b94cce hw/timer: QOM'ify slavio_timer
rename slavio_timer_init1 to slavio_timer_init and assign
it to slavio_timer_info.instance_init, then we drop the
SysBusDeviceClass::init

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-06-02 05:54:43 +01:00
xiaoqiang zhao
c04e34a982 hw/timer: QOM'ify m48txx_sysbus
* split the old SysBus init function into an instance_init
  and a Device realize function
* use DeviceClass::realize instead of SysBusDeviceClass::init
* assign DeviceClass::vmsd instead of using vmstate_register function

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-06-02 05:54:43 +01:00
xiaoqiang zhao
46eedc0e69 hw/misc: QOM'ify slavio_misc.c
Drop the old SysBus init function and use instance_init

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-06-02 05:54:43 +01:00
xiaoqiang zhao
1c958ad300 hw/dma: QOM'ify sun4m_iommu.c
Drop the old SysBus init function and use instance_init

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-06-02 05:54:43 +01:00
xiaoqiang zhao
8c612079e0 hw/dma: QOM'ify sparc32_dma.c
Drop the old SysBus init function and use instance_init
and an realize function

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-06-02 05:54:43 +01:00
xiaoqiang zhao
b229a5765b hw/misc: QOM'ify eccmemctl.c
* Split the old SysBus init into an instance_init and a
  DeviceClass::realize function
* Drop the old SysBus init function and use instance_init

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-06-02 05:54:43 +01:00
Juan Quintela
2c9e6fec89 migration: Move include/migration/block.h into migration/
All functions were internal, except blk_mig_init() that is exported in
misc.h now.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:49:24 +02:00
Juan Quintela
7b1e1a2202 migration: Export ram.c functions in its own file
All functions are internal except for ram_mig_init().  Create
migration/misc.h for this kind of functions.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:49:23 +02:00
Juan Quintela
5e22479ae2 migration: Create include for migration snapshots
Start removing migration code from sysemu/sysemu.h.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:49:23 +02:00
Juan Quintela
e1a3ecee3b migration: Export rdma.c functions in its own file
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:49:23 +02:00
Juan Quintela
41d64227ed migration: Export tls.c functions in its own file
Just for the functions exported from tls.c.  Notice that we can't
remove the migration/migration.h include from tls.c because it access
directly MigrationState for the tls params.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:49:23 +02:00
Juan Quintela
61e8b14880 migration: Export socket.c functions in its own file
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:49:23 +02:00
Juan Quintela
7fcac4a2cc migration: Export fd.c functions in its own file
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:49:22 +02:00
Juan Quintela
f4dbe1bf34 migration: Export exec.c functions in its own file
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:49:22 +02:00
Juan Quintela
08a0aee15c migration: Split qemu-file.h
Split the file into public and internal interfaces.  I have to rename
the external one because we can't have two include files with the same
name in the same directory.  Build system gets confused.  The only
exported functions are the ones that handle basic types.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:49:22 +02:00
Juan Quintela
107da9acb5 migration: Remove unneeded includes of migration/vmstate.h
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:49:22 +02:00
Peter Xu
660819b1df migration: shut src return path unconditionally
We were do the shutting off only for postcopy. Now we do this as long as
the source return path is there.

Moving the cleanup of from_src_file there too.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-01 18:49:12 +02:00
Peter Xu
3482655bbc migration: fix leak of src file on dst
The return path channel is possibly leaked. Fix it.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-06-01 18:48:58 +02:00
Juan Quintela
3a011c26bc migration: Remove section_id parameter from vmstate_load
Everything else assumes that we always load a device from its own
savevm handler.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:31:13 +02:00
Juan Quintela
c2355ad47d migration: loadvm handlers are not used
So we remove all traces of them.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-06-01 18:31:13 +02:00
Juan Quintela
0f42f65781 migration: Use savevm_handlers instead of loadvm copy
There is no reason for having the loadvm_handlers at all.  There is
only one use, and we can use the savevm handlers.

We will remove the loadvm handlers on a following patch.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

--

- Added load_version_id: version_id read from the stream (laurent)
- Added load_section_id: section_id read from the stream (dave)
2017-06-01 18:31:13 +02:00
Peter Maydell
43771d5d92 Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-05-31' into staging
QAPI patches for 2017-05-31

# gpg: Signature made Wed 31 May 2017 18:06:39 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2017-05-31:
  qapi: Reject alternates that can't work with keyval_parse()
  tests/qapi-schema: Avoid 'str' in alternate test cases
  qapi: Document visit_type_any() issues with keyval input
  qobject-input-visitor: Reject non-finite numbers with keyval

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-01 16:39:16 +01:00
Peter Maydell
c077a998eb Merge remote-tracking branch 'remotes/riku/tags/pull-linux-user-20170531' into staging
Misc linux-user updates

# gpg: Signature made Wed 31 May 2017 12:33:17 BST
# gpg:                using RSA key 0xB44890DEDE3C9BC0
# gpg: Good signature from "Riku Voipio <riku.voipio@iki.fi>"
# gpg:                 aka "Riku Voipio <riku.voipio@linaro.org>"
# Primary key fingerprint: FF82 03C8 C391 98AE 0581  41EF B448 90DE DE3C 9BC0

* remotes/riku/tags/pull-linux-user-20170531:
  linux-user: add strace support for uinfo structure of rt_sigqueueinfo() and rt_tgsigqueueinfo()
  linux-user: fix inconsistent spaces in print_siginfo() output
  linux-user: add rt_tgsigqueueinfo() strace
  linux-user: add support for rt_tgsigqueueinfo() system call
  linux-user: fix argument type declaration of rt_sigqueinfo() syscall
  linux-user: fix mismatch of lock/unlock_user() invocations in rt_sigqueinfo() syscall
  linux-user: fix ssetmask() system call
  linux-user: add tkill(), tgkill() and rt_sigqueueinfo() strace
  linux-user: add strace for getuid(), gettid(), getppid(), geteuid()
  linux-user: remove all traces of qemu from /proc/self/cmdline
  linux-user: allocate heap memory for execve arguments
  linux-user: fix inotify
  linux-user: fix fadvise64_64() on ppc
  linux-user: fix eventfd
  linux-user: call fd_trans_target_to_host_data() for write()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-01 15:50:40 +01:00
Peter Maydell
e5cac10a3b Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170531' into staging
migration/next for 20170531

# gpg: Signature made Wed 31 May 2017 08:53:06 BST
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration/20170531:
  migration: use dirty_rate_high_cnt more aggressively
  migration: set bytes_xfer_* outside of autoconverge logic
  migration: set dirty_pages_rate before autoconverge logic
  migration: keep bytes_xfer_prev init'd to zero
  migration: Create savevm.h for functions exported from savevm.c

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-01 15:01:59 +01:00
Peter Maydell
61462af65a Merge remote-tracking branch 'remotes/aurel/tags/pull-target-sh4-20170530' into staging
Queued target/sh4 patches

# gpg: Signature made Tue 30 May 2017 20:12:10 BST
# gpg:                using RSA key 0xBA9C78061DDD8C9B
# gpg: Good signature from "Aurelien Jarno <aurelien@aurel32.net>"
# gpg:                 aka "Aurelien Jarno <aurelien@jarno.fr>"
# gpg:                 aka "Aurelien Jarno <aurel32@debian.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 7746 2642 A9EF 94FD 0F77  196D BA9C 7806 1DDD 8C9B

* remotes/aurel/tags/pull-target-sh4-20170530:
  target/sh4: fix RTE instruction delay slot
  target/sh4: ignore interrupts in a delay slot
  target/sh4: introduce DELAY_SLOT_MASK
  target/sh4: fix reset when using a kernel and an initrd
  target/sh4: log unauthorized accesses using qemu_log_mask

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-01 13:12:20 +01:00
Peter Maydell
066ae4f829 Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging
Various bugfixes and code cleanups. Most notably, it fixes metadata handling in
mapped-file security mode (especially for the virtfs root).

# gpg: Signature made Tue 30 May 2017 14:36:22 BST
# gpg:                using DSA key 0x02FC3AEB0101DBC2
# gpg: Good signature from "Greg Kurz <groug@kaod.org>"
# gpg:                 aka "Greg Kurz <groug@free.fr>"
# gpg:                 aka "Greg Kurz <gkurz@linux.vnet.ibm.com>"
# gpg:                 aka "Gregory Kurz (Groug) <groug@free.fr>"
# gpg:                 aka "[jpeg image of size 3330]"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2BD4 3B44 535E C0A7 9894  DBA2 02FC 3AEB 0101 DBC2

* remotes/gkurz/tags/for-upstream:
  9pfs: local: metadata file for the VirtFS root
  9pfs: local: simplify file opening
  9pfs: local: resolve special directories in paths
  9pfs: check return value of v9fs_co_name_to_path()
  util: drop old utimensat() compat code
  9pfs: assume utimensat() and futimens() are present
  fsdev: fix virtfs-proxy-helper cwd
  9pfs: local: fix unlink of alien files in mapped-file mode
  9pfs: drop pdu_push_and_notify()
  fsdev: don't allow unknown format in marshal/unmarshal
  virtio-9p/xen-9p: move 9p specific bits to core 9p code

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-01 12:06:58 +01:00
Peter Maydell
70f31414e7 Merge remote-tracking branch 'remotes/ehabkost/tags/numa-pull-request' into staging
NUMA fixes, 2017-05-30

# gpg: Signature made Tue 30 May 2017 20:10:44 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/numa-pull-request:
  numa: Fix format string for "Invalid node" message
  numa-test: fix query-cpus leaks

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-06-01 10:58:47 +01:00
Markus Armbruster
c0644771eb qapi: Reject alternates that can't work with keyval_parse()
Alternates are sum types like unions, but use the JSON type on the
wire / QType in QObject instead of an explicit tag.  That's why we
require alternate members to have distinct QTypes.

The recently introduced keyval_parse() (commit d454dbe) can only
produce string scalars.  The qobject_input_visitor_new_keyval() input
visitor mostly hides the difference, so code using a QObject input
visitor doesn't have to care whether its input was parsed from JSON or
KEY=VALUE,...  The difference leaks for alternates, as noted in commit
0ee9ae7: a non-string, non-enum scalar alternate value can't currently
be expressed.

In part, this is just our insufficiently sophisticated implementation.
Consider alternate type 'GuestFileWhence'.  It has an integer member
and a 'QGASeek' member.  The latter is an enumeration with values
'set', 'cur', 'end'.  The meaning of b=set, b=cur, b=end, b=0, b=1 and
so forth is perfectly obvious.  However, our current implementation
falls apart at run time for b=0, b=1, and so forth.  Fixable, but not
today; add a test case and a TODO comment.

Now consider an alternate type with a string and an integer member.
What's the meaning of a=42?  Is it the string "42" or the integer 42?
Whichever meaning you pick makes the other inexpressible.  This isn't
just an implementation problem, it's fundamental.  Our current
implementation will pick string.

So far, we haven't needed such alternates.  To make sure we stop and
think before we add one that cannot sanely work with keyval_parse(),
let's require alternate members to have sufficiently distinct
representation in KEY=VALUE,... syntax:

* A string member clashes with any other scalar member

* An enumeration member clashes with bool members when it has value
  'on' or 'off'.

* An enumeration member clashes with numeric members when it has a
  value that starts with '-', '+', or a decimal digit.  This is a
  rather lazy approximation of the actual number syntax accepted by
  the visitor.

  Note that enumeration values starting with '-' and '+' are rejected
  elsewhere already, but better safe than sorry.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1495471335-23707-5-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
2017-05-31 16:04:09 +02:00
Markus Armbruster
8168ca8ea3 tests/qapi-schema: Avoid 'str' in alternate test cases
The next commit is going to make alternate members of type 'str'
conflict with other scalar types.  Would break a few test cases that
don't actually require 'str'.  Flip them from 'str' to 'bool' or
'EnumOne'.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1495471335-23707-4-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
2017-05-31 16:04:05 +02:00
Markus Armbruster
8339fa266c qapi: Document visit_type_any() issues with keyval input
It's already documented in keyval.c (commit 0ee9ae7), but visitor.h
can use a note, too.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1495471335-23707-3-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
2017-05-31 16:04:05 +02:00
Markus Armbruster
5891c388bb qobject-input-visitor: Reject non-finite numbers with keyval
The QObject input visitor can produce only finite numbers when its
input comes out of the JSON parser, because the the JSON parser
implements RFC 7159, which provides no syntax for infinity and NaN.

However, it can produce infinity and NaN when its input comes out of
keyval_parse(), because we parse with strtod() then.

The keyval variant should not be able to express things the JSON
variant can't.  Rejecting non-finite numbers there is the conservative
fix.  It's also minimally invasive.

We could instead extend our JSON dialect to provide for infinity and
NaN.  Not today.

Note that the JSON formatter can emit non-finite numbers (marked FIXME
in commit 6e8e5cb).

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1495471335-23707-2-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
2017-05-31 16:04:05 +02:00
Felipe Franciosi
b4a3c64b16 migration: use dirty_rate_high_cnt more aggressively
The commit message from 070afca25 suggests that dirty_rate_high_cnt
should be used more aggressively to start throttling after two
iterations instead of four. The code, however, only changes the auto
convergence behaviour to throttle after three iterations. This makes the
behaviour more aggressive by kicking off throttling after two iterations
as originally intended.

Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-05-31 09:39:20 +02:00
Felipe Franciosi
d2a4d85a8a migration: set bytes_xfer_* outside of autoconverge logic
The bytes_xfer_now/prev counters are only used by the auto convergence
logic. However, they are used alongside the dirty_pages_rate counter,
which is calculated (and required) outside of this logic. The problem
with this approach is that if the auto convergence capability is changed
while a migration is ongoing, the relationship of the counters will be
broken.

This moves the management of bytes_xfer_now/prev counters outside of the
auto convergence logic to address this issue.

Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-05-31 09:39:20 +02:00
Felipe Franciosi
d693c6f10f migration: set dirty_pages_rate before autoconverge logic
Currently, a "period" in the RAM migration logic is at least a second
long and accounts for what happened since the last period (or the
beginning of the migration). The dirty_pages_rate counter is calculated
at the end this logic.

If the auto convergence capability is enabled from the start of the
migration, it won't be able to use this counter the first time around.
This calculates dirty_pages_rate as soon as a period is deemed over,
which allows for it to be used immediately.

Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-05-31 09:39:20 +02:00
Felipe Franciosi
9884db2814 migration: keep bytes_xfer_prev init'd to zero
The first time migration_bitmap_sync() is called, bytes_xfer_prev is set
to ram_state.bytes_transferred which is, at this point, zero. The next
time migration_bitmap_sync() is called, an iteration has happened and
bytes_xfer_prev is set to 'x' bytes. Most likely, more than one second
has passed, so the auto converge logic will be triggered and
bytes_xfer_now will also be set to 'x' bytes.

This condition is currently masked by dirty_rate_high_cnt, which will
wait for a few iterations before throttling. It would otherwise always
assume zero bytes have been copied and therefore throttle the guest
(possibly) prematurely.

Given bytes_xfer_prev is only used by the auto convergence logic, it
makes sense to only set its value after a check has been made against
bytes_xfer_now.

Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-05-31 09:39:20 +02:00
Juan Quintela
20a519a05a migration: Create savevm.h for functions exported from savevm.c
This removes last trace of migration functions from sysemu/sysemu.h.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-05-31 09:39:19 +02:00
Eduardo Habkost
f892291eee numa: Fix format string for "Invalid node" message
Some compilers complain about the PRIu16 format string with the
MAX(src, dst) and MAX_NODES arguments.  Example output from Apple LLVM
version 7.3.0 (clang-703.0.31):

  numa.c:236:20: warning: format specifies type 'unsigned short' but the argument has type 'int' [-Wformat]
                     MAX(src, dst), MAX_NODES);
  ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~
  include/qapi/error.h:163:35: note: expanded from macro 'error_setg'
                          (fmt), ## __VA_ARGS__)
                                    ^~~~~~~~~~~
  glib/2.52.2/include/glib-2.0/glib/gmacros.h:288:20: note: expanded from macro 'MAX'
  #define MAX(a, b)  (((a) > (b)) ? (a) : (b))
                     ^~~~~~~~~~~~~~~~~~~~~~~~~
  numa.c:236:35: warning: format specifies type 'unsigned short' but the argument has type 'int' [-Wformat]
                     MAX(src, dst), MAX_NODES);
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~
  include/qapi/error.h:163:35: note: expanded from macro 'error_setg'
                          (fmt), ## __VA_ARGS__)
                                    ^~~~~~~~~~~
  include/sysemu/sysemu.h:165:19: note: expanded from macro 'MAX_NODES'
  #define MAX_NODES 128
                    ^~~
MAX(src, dst) promotes the src and dst arguments to int, and MAX_NODES
is an int.  Use %d to silence those warnings.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170530184013.31044-1-ehabkost@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-30 16:09:58 -03:00
Marc-André Lureau
5e39d89d20 numa-test: fix query-cpus leaks
Fix test leaks introduced in commit 2941020a47.

(and small extra space removed)

Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170526110456.32004-1-marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-30 16:09:48 -03:00
Aurelien Jarno
be53081a61 target/sh4: fix RTE instruction delay slot
The ReTurn from Exception (RTE) instruction loads the system register
(SR) with the saved system register (SSR). It has a delay slot, and
behaves specially according to the SH4 manual:

  The SR value accessed by the instruction in the RTE delay slot is the
  value restored from SSR by the RTE instruction. The SR and MD values
  defined prior to RTE execution are used to fetch the instruction in
  the RTE delay slot.

The instruction in the delay slot being often a NOP, it doesn't cause
any issue most of the time except in some rare cases where the NOP is
being splitted in a different TB (for example when the TCG op buffer
is full). In that case the NOP is fetched with the user permissions
and causes an instruction TLB protection violation exception.

This patches fixes that by introducing a new delay slot flag for the
RTE instruction. Given it's a privileged instruction, the RTE delay
slot instruction is always fetched in privileged mode. It is therefore
enough to to check for this flag in cpu_mmu_index.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-05-30 21:00:56 +02:00
Aurelien Jarno
5c6f3eb7db target/sh4: ignore interrupts in a delay slot
Delay slots are indivisible, therefore avoid scheduling an interrupt in
the delay slot. However exceptions are possible.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-05-30 21:00:56 +02:00
Aurelien Jarno
9a562ae7ba target/sh4: introduce DELAY_SLOT_MASK
This will make easier the introduction of a new flag in the next
patches.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-05-30 21:00:56 +02:00
Aurelien Jarno
73479c5c87 target/sh4: fix reset when using a kernel and an initrd
When a masked exception happens, the SH4 CPU generates a non-masked
reset exception, which then jumps to the reset vector at address
0xA0000000. While this is emulated correctly in QEMU, this does not
work when using a kernel and initrd as this address then contain an
illegal instruction (and there is no guarantee the kernel and initrd
haven't been overwritten).

Therefore call qemu_system_reset_request to reload the kernel and initrd
and load the program counter to the kernel entry point.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-05-30 21:00:56 +02:00
Aurelien Jarno
324189babb target/sh4: log unauthorized accesses using qemu_log_mask
qemu_log_mask() is preferred over fprintf() for logging errors.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2017-05-30 18:28:00 +02:00
Stefan Hajnoczi
0748b3526e Merge remote-tracking branch 'kwolf/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Mon 29 May 2017 03:34:59 PM BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* kwolf/tags/for-upstream:
  block/file-*: *_parse_filename() and colons
  block: Fix backing paths for filenames with colons
  block: Tweak error message related to qemu-img amend
  qemu-img: Fix leakage of options on error
  qemu-img: copy *key-secret opts when opening newly created files
  qemu-img: introduce --target-image-opts for 'convert' command
  qemu-img: fix --image-opts usage with dd command
  qemu-img: add support for --object with 'dd' command
  qemu-img: Fix documentation of convert
  qcow2: remove extra local_error variable
  mirror: Drop permissions on s->target on completion
  nvme: Add support for Controller Memory Buffers
  iotests: 147: Don't test inet6 if not available
  qemu-iotests: Test streaming with missing job ID
  stream: fix crash in stream_start() when block_job_create() fails

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-30 14:15:15 +01:00
Stefan Hajnoczi
697e42dec8 Merge remote-tracking branch 'kraxel/tags/pull-usb-20170529-1' into staging
usb: depricate legacy options and hmp commands
usb: fixes for ehci and hub, split xhci variants

# gpg: Signature made Mon 29 May 2017 02:07:17 PM BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* kraxel/tags/pull-usb-20170529-1:
  ehci: fix frame timer invocation.
  usb: don't wakeup during coldplug
  usb-hub: set PORT_STAT_C_SUSPEND on host-initiated wake-up
  xhci: add CONFIG_USB_XHCI_NEC option
  xhci: split into multiple files
  usb: Simplify the parameter parsing of the legacy usb serial device
  usb: Deprecate HMP commands usb_add and usb_del
  usb: Deprecate the legacy -usbdevice option
  ehci: fix overflow in frame timer code

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-30 14:15:10 +01:00
Stefan Hajnoczi
a3203e7dd3 Merge remote-tracking branch 'mst/tags/for_upstream' into staging
pci, virtio, vhost: fixes

A bunch of fixes all over the place. Most notably this fixes
the new MTU feature when using vhost.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Mon 29 May 2017 01:10:24 AM BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* mst/tags/for_upstream:
  acpi-test: update expected files
  pc: ACPI BIOS: use highest NUMA node for hotplug mem hole SRAT entry
  vhost-user: pass message as a pointer to process_message_reply()
  virtio_net: Bypass backends for MTU feature negotiation
  intel_iommu: turn off pt before 2.9
  intel_iommu: support passthrough (PT)
  intel_iommu: allow dev-iotlb context entry conditionally
  intel_iommu: use IOMMU_ACCESS_FLAG()
  intel_iommu: provide vtd_ce_get_type()
  intel_iommu: renaming context entry helpers
  x86-iommu: use DeviceClass properties
  memory: remove the last param in memory_region_iommu_replay()
  memory: tune last param of iommu_ops.translate()

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-30 14:15:04 +01:00
Stefan Hajnoczi
08f44282c1 Merge remote-tracking branch 'sthibault/tags/samuel-thibault' into staging
slirp updates

# gpg: Signature made Sat 27 May 2017 10:36:33 PM BST
# gpg:                using RSA key 0xB0A51BF58C9179C5
# gpg: Good signature from "Samuel Thibault <samuel.thibault@aquilenet.fr>"
# gpg:                 aka "Samuel Thibault <sthibault@debian.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@gnu.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@inria.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@labri.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@ens-lyon.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@u-bordeaux.fr>"
# Primary key fingerprint: 900C B024 B679 31D4 0F82  304B D017 8C76 7D06 9EE6
#      Subkey fingerprint: AEBF 7448 FAB9 453A 4552  390E B0A5 1BF5 8C91 79C5

* sthibault/tags/samuel-thibault:
  Fix total IP header length in forwarded TCP packets
  slirp: fix leak
  slirp: Fix wrong mss bug.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-30 14:14:57 +01:00
Stefan Hajnoczi
7b6badb6a9 Merge remote-tracking branch 'jtc/tags/block-pull-request' into staging
# gpg: Signature made Fri 26 May 2017 08:22:27 PM BST
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* jtc/tags/block-pull-request:
  block/gluster: glfs_lseek() workaround
  blockjob: use deferred_to_main_loop to indicate the coroutine has ended
  blockjob: reorganize block_job_completed_txn_abort
  blockjob: strengthen a bit test-blockjob-txn
  blockjob: group BlockJob transaction functions together
  blockjob: introduce block_job_cancel_async, check iostatus invariants
  blockjob: move iostatus reset inside block_job_user_resume
  blockjob: separate monitor and blockjob APIs
  blockjob: introduce block_job_pause/resume_all
  blockjob: introduce block_job_early_fail
  blockjob: remove iostatus_reset callback
  blockjob: remove unnecessary check

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-30 14:14:48 +01:00
Stefan Hajnoczi
5bb0d22cb4 Merge remote-tracking branch 'dgibson/tags/ppc-for-2.10-20170525' into staging
ppc patch queue 2017-05-25

Assorted accumulated patches.  These are nearly all bugfixes at one
level or another - some for longstanding problems, others for some
regressions caused by more recent cleanups.

This includes preliminary patches towards fixing migration for Radix
Page Table guests under POWER9 and also fixing some migration
regressions due to the re-organization of the interrupt controller
code.  Not all the pieces are there yet, so those still won't quite
work, but the preliminary changes make sense on their own.

# gpg: Signature made Thu 25 May 2017 04:50:00 AM BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* dgibson/tags/ppc-for-2.10-20170525:
  xics: add unrealize handler
  hw/ppc/spapr.c: recover pending LMB unplug info in spapr_lmb_release
  hw/ppc: migrating the DRC state of hotplugged devices
  hw/ppc: removing drc->detach_cb and drc->detach_cb_opaque
  hw/ppc/spapr.c: adding pending_dimm_unplugs to sPAPRMachineState
  spapr: add pre_plug function for memory
  pseries: Restore support for total vcpus not a multiple of threads-per-core for old machine types
  pseries: Split CAS PVR negotiation out into a separate function
  spapr: fix error reporting in xics_system_init()
  spapr_cpu_core: drop reference on ICP object during CPU realization
  hw/ppc/spapr_events.c: removing 'exception' from sPAPREventLogEntry
  spapr: ensure core_slot isn't NULL in spapr_core_unplug()
  xics_kvm: cache already enabled vCPU ids
  spapr: Consolidate HPT freeing code into a routine
  spapr-cpu-core: release ICP object when realization fails
  spapr: sanitize error handling in spapr_ics_create()
  ppc/xics: simplify prototype of xics_spapr_init()
  target/ppc: reset reservation in do_rfi()

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-30 09:44:58 +01:00
Stefan Hajnoczi
d0eda02938 Merge remote-tracking branch 'armbru/tags/pull-qapi-2017-05-23' into staging
QAPI patches for 2017-05-23

# gpg: Signature made Tue 23 May 2017 12:33:32 PM BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* armbru/tags/pull-qapi-2017-05-23:
  qapi-schema: Remove obsolete note from ObjectTypeInfo
  block: Use QDict helpers for --force-share
  shutdown: Expose bool cause in SHUTDOWN and RESET events
  shutdown: Add source information to SHUTDOWN and RESET
  shutdown: Preserve shutdown cause through replay
  shutdown: Prepare for use of an enum in reset/shutdown_request
  shutdown: Simplify shutdown_signal
  sockets: Plug memory leak in socket_address_flatten()
  scripts/qmp/qom-set: fix the value argument passed to srv.command()

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-30 09:33:40 +01:00
Stefan Hajnoczi
62e570b1c5 Merge remote-tracking branch 'ehabkost/tags/numa-pull-request' into staging
Silence "make check" warnings on NUMA test

# gpg: Signature made Tue 23 May 2017 11:44:24 AM BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* ehabkost/tags/numa-pull-request:
  numa: Silence incomplete mapping warning under qtest

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-30 09:31:09 +01:00
Kevin Wolf
42a4812841 Merge remote-tracking branch 'mreitz/tags/pull-block-2017-05-29-v3' into queue-block
Block patches for the block queue

# gpg: Signature made Mon May 29 16:32:16 2017 CEST
# gpg:                using RSA key 0xF407DB0061D5CF40
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* mreitz/tags/pull-block-2017-05-29-v3:
  block/file-*: *_parse_filename() and colons
  block: Fix backing paths for filenames with colons
  block: Tweak error message related to qemu-img amend
  qemu-img: Fix leakage of options on error
  qemu-img: copy *key-secret opts when opening newly created files
  qemu-img: introduce --target-image-opts for 'convert' command
  qemu-img: fix --image-opts usage with dd command
  qemu-img: add support for --object with 'dd' command
  qemu-img: Fix documentation of convert
  qcow2: remove extra local_error variable

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-05-29 16:34:27 +02:00
Max Reitz
03c320d803 block/file-*: *_parse_filename() and colons
The file drivers' *_parse_filename() implementations just strip the
optional protocol prefix off the filename. However, for e.g.
"file:foo:bar", this would lead to "foo:bar" being stored as the BDS's
filename which looks like it should be managed using the "foo" protocol.
This is especially troublesome if you then try to resolve a backing
filename based on "foo:bar".

This issue can only occur if the stripped part is a relative filename
("file:/foo:bar" will be shortened to "/foo:bar" and having a slash
before the first colon means that "/foo" is not recognized as a protocol
part). Therefore, we can easily fix it by prepending "./" to such
filenames.

Before this patch:
$ ./qemu-img create -f qcow2 backing.qcow2 64M
Formatting 'backing.qcow2', fmt=qcow2 size=67108864 encryption=off
    cluster_size=65536 lazy_refcounts=off refcount_bits=16
$ ./qemu-img create -f qcow2 -b backing.qcow2 file:top:image.qcow2
Formatting 'file:top:image.qcow2', fmt=qcow2 size=67108864
    backing_file=backing.qcow2 encryption=off cluster_size=65536
    lazy_refcounts=off refcount_bits=16
$ ./qemu-io file:top:image.qcow2
can't open device file:top:image.qcow2: Could not open backing file:
    Unknown protocol 'top'

After this patch:
$ ./qemu-io file:top:image.qcow2
[no error]

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170522195217.12991-3-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-29 15:39:54 +02:00
Max Reitz
0d54a6fed3 block: Fix backing paths for filenames with colons
path_combine() naturally tries to preserve a protocol prefix. However,
it recognizes such a prefix by scanning for the first colon; which is
different from what path_has_protocol() does: There only is a protocol
prefix if there is a colon before the first slash.

A protocol prefix that is not recognized by path_has_protocol() is none,
and should thus not be taken as one.

Case in point, before this patch:
$ ./qemu-img create -f qcow2 -b backing.qcow2 ./top:image.qcow2
qemu-img: ./top:image.qcow2: Could not open './top:backing.qcow2':
    No such file or directory

Afterwards:
$ ./qemu-img create -f qcow2 -b backing.qcow2 ./top:image.qcow2
qemu-img: ./top:image.qcow2: Could not open './backing.qcow2':
    No such file or directory

Reported-by: yangyang <yangyang@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170522195217.12991-2-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-29 15:39:54 +02:00
Eric Blake
bcb07dba92 block: Tweak error message related to qemu-img amend
When converting a 1.1 image down to 0.10, qemu-iotests 060 forces
a contrived failure where allocating a cluster used to replace a
zero cluster reads unaligned data.  Since it is a zero cluster
rather than a data cluster being converted, changing the error
message to match our earlier change in 'qcow2: Make distinction
between zero cluster types obvious' is worthwhile.

Suggested-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 20170508171302.17805-1-eblake@redhat.com
[mreitz: Commit message fixes]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-29 15:39:54 +02:00
Fam Zheng
adb998c12a qemu-img: Fix leakage of options on error
Reported by Coverity.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 20170515141014.25793-1-famz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-29 15:39:54 +02:00
Daniel P. Berrange
29cf933635 qemu-img: copy *key-secret opts when opening newly created files
The qemu-img dd/convert commands will create an image file and
then try to open it. Historically it has been possible to open
new files without passing any options. With encrypted files
though, the *key-secret options are mandatory, so we need to
provide those options when opening the newly created file.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170515164712.6643-5-berrange@redhat.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-29 15:39:54 +02:00
Daniel P. Berrange
305b4c60f2 qemu-img: introduce --target-image-opts for 'convert' command
The '--image-opts' flag indicates whether the source filename
includes options. The target filename has to remain in the
plain filename format though, since it needs to be passed to
bdrv_create().  When using --skip-create though, it would be
possible to use image-opts syntax. This adds --target-image-opts
to indicate that the target filename includes options. Currently
this mandates use of the --skip-create flag too.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170515164712.6643-4-berrange@redhat.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-29 15:39:54 +02:00
Daniel P. Berrange
ea204ddac7 qemu-img: fix --image-opts usage with dd command
The --image-opts flag can only be used to affect the parsing
of the source image. The target image has to be specified in
the traditional style regardless, since it needs to be passed
to the bdrv_create() API which does not support the new style
opts.

Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170515164712.6643-3-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-29 15:39:53 +02:00
Daniel P. Berrange
83d4bf943e qemu-img: add support for --object with 'dd' command
The qemu-img dd command added --image-opts support, but missed
the corresponding --object support. This prevented passing
secrets (eg auth passwords) needed by certain disk images.

Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170515164712.6643-2-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-29 15:39:53 +02:00
Fam Zheng
caa31bf28c qemu-img: Fix documentation of convert
It got lost in commit a8d16f9ca "qemu-img: Update documentation for -U".

Reported-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 20170515103551.31313-1-famz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-29 15:39:53 +02:00
Alberto Garcia
a7a6a2bffc qcow2: remove extra local_error variable
Commit d7086422b1 added a local_err
variable global to the qcow2_amend_options() function, so there's no
need to have this other one.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170511150337.21470-1-berto@igalia.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-29 15:39:53 +02:00
Kevin Wolf
63c8ef2890 mirror: Drop permissions on s->target on completion
This fixes an assertion failure that was triggered by qemu-iotests 129
on some CI host, while the same test case didn't seem to fail on other
hosts.

Essentially the problem is that the blk_unref(s->target) in
mirror_exit() doesn't necessarily mean that the BlockBackend goes away
immediately. It is possible that the job completion was triggered nested
in mirror_drain(), which looks like this:

    BlockBackend *target = s->target;
    blk_ref(target);
    blk_drain(target);
    blk_unref(target);

In this case, the write permissions for s->target are retained until
after blk_drain(), which makes removing mirror_top_bs fail for the
active commit case (can't have a writable backing file in the chain
without the filter driver).

Explicitly dropping the permissions first means that the additional
reference doesn't hurt and the job can complete successfully even if
called from the nested blk_drain().

Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-05-29 15:37:26 +02:00
Gerd Hoffmann
3bfecee2cb ehci: fix frame timer invocation.
ehci registers ehci_frame_timer as both timer and bottom half, which
turned out to be a bad idea as it can be called as bottom half then
while it is running as timer, and it isn't prepared to handle recursive
calls.

Change the timer func to just schedule the bottom half to avoid this.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1449609
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170519120428.25981-1-kraxel@redhat.com
2017-05-29 14:19:16 +02:00
Gerd Hoffmann
26022652c6 usb: don't wakeup during coldplug
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1452512
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170523084635.20062-1-kraxel@redhat.com
2017-05-29 14:18:09 +02:00
Ladi Prosek
6361bbc7e2 usb-hub: set PORT_STAT_C_SUSPEND on host-initiated wake-up
PORT_STAT_C_SUSPEND should be set even on host-initiated wake-up,
i.e. on ClearPortFeature(PORT_SUSPEND). Windows is known to not
work properly otherwise.

Side note, since PORT_ENABLE looks similar and might appear to
have the same issue: According to 11.24.2.7.2.2 C_PORT_ENABLE:

  "This bit is set when the PORT_ENABLE bit changes from one to
  zero as a result of a Port Error condition (see Section 11.8.1).
  This bit is not set on any other changes to PORT_ENABLE."

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Message-id: 20170522123325.2199-1-lprosek@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-05-29 14:17:59 +02:00
Gerd Hoffmann
2da077a881 xhci: add CONFIG_USB_XHCI_NEC option
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1451189
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170517103313.8459-2-kraxel@redhat.com
2017-05-29 14:03:36 +02:00
Gerd Hoffmann
0bbb2f3df1 xhci: split into multiple files
Moved structs and defines to hcd-xhci.h.
Move nec controller variant to hcd-xhci-nec.c.
No functional changes.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 20170517103313.8459-1-kraxel@redhat.com
2017-05-29 14:03:35 +02:00
Thomas Huth
e14935df26 usb: Simplify the parameter parsing of the legacy usb serial device
Coverity complains about the current code, so let's get rid of
the now unneeded while loop and simply always emit "unrecognized
serial USB option" for all unsupported options.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1495177204-16808-1-git-send-email-thuth@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-05-29 14:03:35 +02:00
Thomas Huth
b813bed1ab usb: Deprecate HMP commands usb_add and usb_del
The commands 'device_add' and 'device_del' should be used
nowadays instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: 1495175803-12830-1-git-send-email-thuth@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-05-29 14:03:35 +02:00
Thomas Huth
a358a3af45 usb: Deprecate the legacy -usbdevice option
The '-usbdevice' option is considered as deprecated nowadays and
we might want to remove these options in a future version of QEMU.
So mark this options as deprecated in the documenation and print out
a warning if it is used to tell the user what to use instead.
While we're at it, improve also some other minor USB-related spots
in qemu-options.hx that were not up to date anymore.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1495175716-12735-1-git-send-email-thuth@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-05-29 14:03:35 +02:00
Gerd Hoffmann
3ae7eb88c4 ehci: fix overflow in frame timer code
In case the frame timer doesn't run for a while due to the host being
busy skipped_uframes can become big enough that UFRAME_TIMER_NS *
skipped_uframes overflows.  Which in turn throws off all subsequent
ehci frame timer calculations.

Reported-by: 李林 <8610_28@163.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170515104543.32044-1-kraxel@redhat.com
2017-05-29 14:03:35 +02:00
Miloš Stojanović
ba9fcea1cb linux-user: add strace support for uinfo structure of rt_sigqueueinfo() and rt_tgsigqueueinfo()
This commit adds support for printing the content of the target_siginfo_t
structure in a similar way to how it is printed by the host strace. The
pointer to this structure is sent as the last argument of the
rt_sigqueueinfo() and rt_tgsigqueueinfo() system calls.
For this purpose, print_siginfo() is used and the get_target_siginfo()
function is implemented in order to get the information obtained from
the pointer into the form that print_siginfo() expects.

The get_target_siginfo() function is based on
host_to_target_siginfo_noswap() in linux-user mode, but here both
arguments are pointers to target_siginfo_t, so instead of converting
the information to siginfo_t it just extracts and copies it to a
target_siginfo_t structure.

Prior to this commit, typical strace output used to look like this:
8307 rt_sigqueueinfo(8307,50,0x00000040007ff6b0) = 0

After this commit, it looks like this:
8307 rt_sigqueueinfo(8307,50,{si_signo=50, si_code=SI_QUEUE, si_pid=8307,
si_uid=1000, si_sigval=17716762128}) = 0

Signed-off-by: Miloš Stojanović <Milos.Stojanovic@rt-rk.com>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:09 +03:00
Miloš Stojanović
f196c3700d linux-user: fix inconsistent spaces in print_siginfo() output
This patch improves the consistentcy of the output from print_siginfo()
by removing spaces around the equal sign of si_pid, si_uid, si_timer1,
si_timer2, si_band, si_fd, si_addr, si_status and si_sigval. This way
they match si_signo and ci_code. Host strace was used as a reference
for this chage.

Prior to this commit, typical strace output used to look like this:

Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:08 +03:00
Miloš Stojanović
243e0fe550 linux-user: add rt_tgsigqueueinfo() strace
This commit improves strace support for syscall rt_tgsigqueueinfo().

Prior to this commit, typical strace output used to look like this:
7775 rt_tgsigqueueinfo(7775,7775,50,1996483164,0,0) = 0

After this commit, it looks like this:
7775 rt_tgsigqueueinfo(7775,7775,50,0x76ffea5c) = 0

Signed-off-by: Miloš Stojanović <Milos.Stojanovic@rt-rk.com>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:08 +03:00
Miloš Stojanović
cf8b8bfc50 linux-user: add support for rt_tgsigqueueinfo() system call
Add a new system call: rt_tgsigqueueinfo().

This system call is similar to rt_sigqueueinfo(), but instead of
sending the signal and data to the whole thread group with the ID
equal to the argument tgid, it sends it to a single thread within
that thread group. The ID of the thread is specified by the tid
argument.

The implementation is based on the rt_sigqueueinfo() in linux-user
mode, where the tid is added as the second argument and the
previous second and third argument become arguments three and four,
respectively.

Signed-off-by: Miloš Stojanović <Milos.Stojanovic@rt-rk.com>

Conflicts:
	linux-user/syscall.c
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:08 +03:00
Miloš Stojanović
c1a402a7ae linux-user: fix argument type declaration of rt_sigqueinfo() syscall
Change the type of the first argument of rt_sigqueinfo() from int to pid_t
in the syscall declaration to match specifications of the system call.

Proper spacing is added to satisfy checkpatch.pl.

Signed-off-by: Miloš Stojanović <Milos.Stojanovic@rt-rk.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:08 +03:00
Miloš Stojanović
d8b6d892c6 linux-user: fix mismatch of lock/unlock_user() invocations in rt_sigqueinfo() syscall
Change the unlock_user() argument from arg1 to arg3 to match with
lock_user(), since arg3 contains the pointer to the siginfo_t structure.

Signed-off-by: Miloš Stojanović <Milos.Stojanovic@rt-rk.com>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:08 +03:00
Miloš Stojanović
a8617d8c2f linux-user: fix ssetmask() system call
Fix the ssetmask() system call by removing the invocation of sigorset().

The ssetmask() system call should replace the old signal mask
with the new and return the old mask. It shouldn't combine
the old and the new mask with sigorset(). Fetching the old
mask for sigorset() is also no longer needed.

The problem was detected after running LTP test group syscalls
for the MIPS EL 32 R2 architecture where the test ssetmask01 failed
with exit code 1. The test passes now that the ssetmask() system call
is fixed.

Signed-off-by: Miloš Stojanović <Milos.Stojanovic@rt-rk.com>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:08 +03:00
Miloš Stojanović
5162264e43 linux-user: add tkill(), tgkill() and rt_sigqueueinfo() strace
Improve strace support for syscall tkill(), tgkill() and rt_sigqueueinfo()
by implementing print functions that match arguments types of the system
calls and add them to the corresponding starce.list entry.

tkill:
Prior to this commit, typical strace output used to look like this:
4886 tkill(4886,50,0,4832615904,0,-9151031864016699136) = 0
After this commit, it looks like this:
4886 tkill(4886,50) = 0

tgkill:
Prior to this commit, typical strace output used to look like this:
4890 tgkill(4890,4890,50,8,4832630528,4832615904) = 0
After this commit, it looks like this:
4890 tgkill(4890,4890,50) = 0

rt_sigqueueinfo:
Prior to this commit, typical strace output used to look like this:
8307 rt_sigqueueinfo(8307,50,1996483164,0,0,50) = 0
After this commit, it looks like this:
8307 rt_sigqueueinfo(8307,50,0x00000040007ff6b0) = 0

Signed-off-by: Miloš Stojanović <Milos.Stojanovic@rt-rk.com>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:08 +03:00
Miloš Stojanović
65424cc456 linux-user: add strace for getuid(), gettid(), getppid(), geteuid()
Improve strace support for syscalls getuid(), gettid(), getppid()
and geteuid(). Since these system calls don't have arguments, "%s()"
is added in the corresponding strace.list entry so that no arguments
are printed.

getuid:
Prior to this commit, typical strace output used to look like this:
4894 getuid(4894,0,0,274886293296,-3689348814741910323,4832615904) = 1000
After this commit, it looks like this:
4894 getuid() = 1000

gettid:
Prior to this commit, typical strace output used to look like this:
8307 gettid(0,0,64,0,4832630528,4832615840) = 8307
After this commit, it looks like this:
8307 gettid() = 8307

getppid:
Prior to this commit, typical strace output used to look like this:
20588 getppid(20588,64,0,4832630528,4832615888,0) = 20625
After this commit, it looks like this:
20588 getppid() = 20625

geteuid:
Prior to this commit, typical strace output used to look like this:
20588 geteuid(64,0,0,4832615888,0,-9151031864016699136) = 1000
After this commit, it looks like this:
20588 geteuid() = 1000

Signed-off-by: Miloš Stojanović <Milos.Stojanovic@rt-rk.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:08 +03:00
58de8b9684 linux-user: remove all traces of qemu from /proc/self/cmdline
Instead of post-processing the real contents use the remembered target
argv.  That removes all traces of qemu, including command line options,
and handles QEMU_ARGV0.

Signed-off-by: Andreas Schwab <schwab@suse.de>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:08 +03:00
Prasad J Pandit
b936cb50aa linux-user: allocate heap memory for execve arguments
Arguments passed to execve(2) call from user program could
be large, allocating stack memory for them via alloca(3) call
would lead to bad behaviour. Use 'g_new0' to allocate memory
for such arguments.

Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:08 +03:00
Laurent Vivier
c4e316cfb5 linux-user: fix inotify
When a fd is opened using inotify_init(), a read provides
one or more inotify_event structures:

    struct inotify_event {
        int      wd;
        uint32_t mask;
        uint32_t cookie;
        uint32_t len;
        char     name[];
    };

The integer fields must be byte-swapped to the target endianness.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:07 +03:00
Laurent Vivier
43046b5a07 linux-user: fix fadvise64_64() on ppc
On ppc, advice is arg2, not arg6:

long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
                      u32 len_high, u32 len_low)

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:07 +03:00
Laurent Vivier
562a20b4ef linux-user: fix eventfd
When a fd is opened using eventfd(), a read provides
a 64bit counter in the host byte order, and a
write increase the internal counter by the provided
64bit value.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:07 +03:00
Laurent Vivier
04b9bcf911 linux-user: call fd_trans_target_to_host_data() for write()
As for sendmsg() or sendto(), we must call the target to
host data translator if it is defined. This is needed for
eventfd(): the write() syscall allows to add a value to
the internal counter, and so, it must be byte-swapped to
the host order.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-05-29 14:56:07 +03:00
Michael S. Tsirkin
811bf15114 acpi-test: update expected files
commit 1a8d61ddbf ("pc: ACPI BIOS: use highest NUMA node for hotplug mem
hole SRAT entry") changed generated SRAT tables, update expected files
accordingly.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-05-29 03:07:57 +03:00
Ladi Prosek
ede24a0264 pc: ACPI BIOS: use highest NUMA node for hotplug mem hole SRAT entry
For reasons unknown, Windows won't online all memory, both at command
line and hot-plugged later, unless the hotplug mem hole SRAT entry
specifies a node greater than or equal to the ones where memory is
added.

Using the highest node on the machine makes recent versions of Windows
happy.

With this example command line:
  ... \
  -m 1024,slots=4,maxmem=32G \
  -numa node,nodeid=0 \
  -numa node,nodeid=1 \
  -numa node,nodeid=2 \
  -numa node,nodeid=3 \
  -object memory-backend-ram,size=1G,id=mem-mem1 \
  -device pc-dimm,id=dimm-mem1,memdev=mem-mem1,node=1

Windows reports a total of 1G of RAM without this commit and the expected
2G with this commit.

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Acked-by: Laszlo Ersek <lersek@redhat.com>
2017-05-29 03:07:57 +03:00
Sjors Gielen
2e30230aa9 Fix total IP header length in forwarded TCP packets
When forwarding TCP packets, the internal tcpiphdr struct length was wrongly
used inside the IP header. This commit changes the behaviour to what is used
by tcp_output.c, using the correct full IP header + payload length.

Signed-off-by: Sjors Gielen <sjors@sjorsgielen.nl>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-05-27 23:35:00 +02:00
Marc-André Lureau
7d8246960e slirp: fix leak
Spotted by ASAN:

/x86_64/hmp/pc-0.12:
=================================================================
==22538==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 224 byte(s) in 1 object(s) allocated from:
    #0 0x7f0f63cdee60 in malloc (/lib64/libasan.so.3+0xc6e60)
    #1 0x556f11ff32d7 in tcp_newtcpcb /home/elmarco/src/qemu/slirp/tcp_subr.c:250
    #2 0x556f11fdb1d1 in tcp_listen /home/elmarco/src/qemu/slirp/socket.c:688
    #3 0x556f11fca9d5 in slirp_add_hostfwd /home/elmarco/src/qemu/slirp/slirp.c:1052
    #4 0x556f11f8db41 in slirp_hostfwd /home/elmarco/src/qemu/net/slirp.c:506
    #5 0x556f11f8dd83 in hmp_hostfwd_add /home/elmarco/src/qemu/net/slirp.c:535

There might be a better way to fix this, but calling slirp tcp_close()
doesn't work.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-05-27 23:34:47 +02:00
Tao Wu
c7990a2648 slirp: Fix wrong mss bug.
This bug was introduced by https://github.com/qemu/qemu/commit/98c6305

Signed-off-by: Tao Wu <lepton@google.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-bu: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-05-27 23:34:47 +02:00
Stephen Bates
a896f7f26a nvme: Add support for Controller Memory Buffers
Implement NVMe Controller Memory Buffers (CMBs) which were added in
version 1.2 of the NVMe Specification. This patch adds an optional
argument (cmb_size_mb) which indicates the size of the CMB (in
MB). Currently only the Submission Queue Support (SQS) is enabled
which aligns with the current Linux driver for NVMe.

Signed-off-by: Stephen Bates <sbates@raithlin.com>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-05-26 16:48:21 +02:00
Fam Zheng
cf1cd117e2 iotests: 147: Don't test inet6 if not available
This is the case in our docker tests, as we use --net=none there. Skip
this method.

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-05-26 16:48:21 +02:00
Kevin Wolf
0bb0aea4ba qemu-iotests: Test streaming with missing job ID
This adds a small test for the image streaming error path for failing
block_job_create(), which would have found the null pointer dereference
in commit a170a91f.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Kashyap Chamarthy <kchamart@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
2017-05-26 16:48:21 +02:00
Alberto Garcia
525989a50a stream: fix crash in stream_start() when block_job_create() fails
The code that tries to reopen a BlockDriverState in stream_start()
when the creation of a new block job fails crashes because it attempts
to dereference a pointer that is known to be NULL.

This is a regression introduced in a170a91fd3,
likely because the code was copied from stream_complete().

Cc: qemu-stable@nongnu.org
Reported-by: Kashyap Chamarthy <kchamart@redhat.com>
Signed-off-by: Alberto Garcia <berto@igalia.com>
Tested-by: Kashyap Chamarthy <kchamart@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-05-26 16:48:21 +02:00
Paolo Bonzini
8f7168b343 io: simplify qio_channel_attach_aio_context
If properly preceded by qio_channel_detach_aio_context, this function really
has nothing to do except setting ioc->ctx.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-05-26 10:38:08 +01:00
Maxime Coquelin
3cf7daf8c3 vhost-user: pass message as a pointer to process_message_reply()
process_message_reply() was recently updated to get full message
content instead of only its request field.

There is no need to copy all the struct content into the stack,
so just pass its pointer as const.

Reviewed-by: Jens Freimann <jfreiman@redhat.com>
Reviewed-by: Zhiyong Yang <zhiyong.yang@intel.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
2017-05-25 21:25:28 +03:00
Maxime Coquelin
75ebec11af virtio_net: Bypass backends for MTU feature negotiation
This patch adds a new internal "x-mtu-bypass-backend" property
to bypass backends for MTU feature negotiation.

When this property is set, the MTU feature is negotiated as soon
as supported by the guest and a MTU value is set via the host_mtu
parameter. In case the backend advertises the feature (e.g. DPDK's
vhost-user backend), the feature negotiation is propagated down to
the backend.

When this property is not set, the backend has to support the MTU
feature for its negotiation to succeed.

For compatibility purpose, this property is disabled for machine
types v2.9 and older.

Cc: Aaron Conole <aconole@redhat.com>
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Vlad Yasevich <vyasevic@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-05-25 21:25:28 +03:00
Peter Xu
c10595fb34 intel_iommu: turn off pt before 2.9
This is for compatibility.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
2017-05-25 21:25:28 +03:00
Peter Xu
dbaabb25f4 intel_iommu: support passthrough (PT)
Hardware support for VT-d device passthrough. Although current Linux can
live with iommu=pt even without this, but this is faster than when using
software passthrough.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Liu, Yi L <yi.l.liu@linux.intel.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
2017-05-25 21:25:27 +03:00
Peter Xu
f80c98740e intel_iommu: allow dev-iotlb context entry conditionally
When device-iotlb is not specified, we should fail this check. A new
function vtd_ce_type_check() is introduced.

While I'm at it, clean up the vtd_dev_to_context_entry() a bit - replace
many "else if" usage into direct if check. That'll make the logic more
clear.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
2017-05-25 21:25:27 +03:00
Peter Xu
5a38cb5940 intel_iommu: use IOMMU_ACCESS_FLAG()
We have that now, so why not use it.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
2017-05-25 21:25:27 +03:00
Peter Xu
127ff5c356 intel_iommu: provide vtd_ce_get_type()
Helper to fetch VT-d context entry type.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
2017-05-25 21:25:27 +03:00
Peter Xu
8f7d7161dd intel_iommu: renaming context entry helpers
The old names are too long and less ordered. Let's start to use
vtd_ce_*() as a pattern.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
2017-05-25 21:25:27 +03:00
Peter Xu
0b77d30a43 x86-iommu: use DeviceClass properties
No reason to keep tens of lines if we can do it actually far shorter.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
2017-05-25 21:25:27 +03:00
Peter Xu
ad523590f6 memory: remove the last param in memory_region_iommu_replay()
We were always passing in that one as "false" to assume that's an read
operation, and we also assume that IOMMU translation would always have
that read permission. A better permission would be IOMMU_NONE since the
replay is after all not a real read operation, but just a page table
rebuilding process.

CC: David Gibson <david@gibson.dropbear.id.au>
CC: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
2017-05-25 21:25:27 +03:00
Peter Xu
bf55b7afce memory: tune last param of iommu_ops.translate()
This patch converts the old "is_write" bool into IOMMUAccessFlags. The
difference is that "is_write" can only express either read/write, but
sometimes what we really want is "none" here (neither read nor write).
Replay is an good example - during replay, we should not check any RW
permission bits since thats not an actual IO at all.

CC: Paolo Bonzini <pbonzini@redhat.com>
CC: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
2017-05-25 21:25:27 +03:00
Greg Kurz
81ffbf5ab1 9pfs: local: metadata file for the VirtFS root
When using the mapped-file security, credentials are stored in a metadata
directory located in the parent directory. This is okay for all paths with
the notable exception of the root path, since we don't want and probably
can't create a metadata directory above the virtfs directory on the host.

This patch introduces a dedicated metadata file, sitting in the virtfs root
for this purpose. It relies on the fact that the "." name necessarily refers
to the virtfs root.

As for the metadata directory, we don't want the client to see this file.
The current code only cares for readdir() but there are many other places
to fix actually. The filtering logic is hence put in a separate function.

Before:

# ls -ld
drwxr-xr-x. 3 greg greg 4096 May  5 12:49 .
# chown root.root .
chown: changing ownership of '.': Is a directory
# ls -ld
drwxr-xr-x. 3 greg greg 4096 May  5 12:49 .

After:

# ls -ld
drwxr-xr-x. 3 greg greg 4096 May  5 12:49 .
# chown root.root .
# ls -ld
drwxr-xr-x. 3 root root 4096 May  5 12:50 .

and from the host:

ls -al .virtfs_metadata_root
-rwx------. 1 greg greg 26 May  5 12:50 .virtfs_metadata_root
$ cat .virtfs_metadata_root
virtfs.uid=0
virtfs.gid=0

Reported-by: Leo Gaspard <leo@gaspard.io>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Tested-by: Leo Gaspard <leo@gaspard.io>
[groug: work around a patchew false positive in
        local_set_mapped_file_attrat()]
2017-05-25 10:30:14 +02:00
Greg Kurz
3dbcf27334 9pfs: local: simplify file opening
The logic to open a path currently sits between local_open_nofollow() and
the relative_openat_nofollow() helper, which has no other user.

For the sake of clarity, this patch moves all the code of the helper into
its unique caller. While here we also:
- drop the code to skip leading "/" because the backend isn't supposed to
  pass anything but relative paths without consecutive slashes. The assert()
  is kept because we really don't want a buggy backend to pass an absolute
  path to openat().
- use strchrnul() to get a simpler code. This is ok since virtfs is for
  linux+glibc hosts only.
- don't dup() the initial directory and add an assert() to ensure we don't
  return the global mountfd to the caller. BTW, this would mean that the
  caller passed an empty path, which isn't supposed to happen either.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
[groug: fixed typos in changelog]
2017-05-25 10:30:14 +02:00
Greg Kurz
f57f587857 9pfs: local: resolve special directories in paths
When using the mapped-file security mode, the creds of a path /foo/bar
are stored in the /foo/.virtfs_metadata/bar file. This is okay for all
paths unless they end with '.' or '..', because we cannot create the
corresponding file in the metadata directory.

This patch ensures that '.' and '..' are resolved in all paths.

The core code only passes path elements (no '/') to the backend, with
the notable exception of the '/' path, which refers to the virtfs root.
This patch preserves the current behavior of converting it to '.' so
that it can be passed to "*at()" syscalls ('/' would mean the host root).

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-05-25 10:30:14 +02:00
Greg Kurz
4fa62005d0 9pfs: check return value of v9fs_co_name_to_path()
These v9fs_co_name_to_path() call sites have always been around. I guess
no care was taken to check the return value because the name_to_path
operation could never fail at the time. This is no longer true: the
handle and synth backends can already fail this operation, and so will the
local backend soon.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-05-25 10:30:14 +02:00
Greg Kurz
fcdcf1eed2 util: drop old utimensat() compat code
Now that 9pfs and virtfs-proxy-helper have been converted to utimensat(),
we don't need to keep qemu_utimens() anymore.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-05-25 10:30:14 +02:00
Greg Kurz
24df3371d9 9pfs: assume utimensat() and futimens() are present
The utimensat() and futimens() syscalls have been around for ages (ie,
glibc 2.6 and linux 2.6.22), and the decision was already taken to
switch to utimensat() anyway when fixing CVE-2016-9602 in 2.9.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-05-25 10:30:14 +02:00
Greg Kurz
4be56c1959 fsdev: fix virtfs-proxy-helper cwd
Since chroot() doesn't change the current directory, it is indeed a good
practice to chdir() to the target directory and then then chroot(), or
to chroot() to the target directory and then chdir("/").

The current code does neither of them actually. Let's go for the latter.

This doesn't fix any security issue since all of this takes place before
the helper begins to process requests.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-05-25 10:30:13 +02:00
Greg Kurz
6a87e7929f 9pfs: local: fix unlink of alien files in mapped-file mode
When trying to remove a file from a directory, both created in non-mapped
mode, the file remains and EBADF is returned to the guest.

This is a regression introduced by commit "df4938a6651b 9pfs: local:
unlinkat: don't follow symlinks" when fixing CVE-2016-9602. It changed the
way we unlink the metadata file from

    ret = remove("$dir/.virtfs_metadata/$name");
    if (ret < 0 && errno != ENOENT) {
         /* Error out */
    }
    /* Ignore absence of metadata */

to

    fd = openat("$dir/.virtfs_metadata")
    unlinkat(fd, "$name")
    if (ret < 0 && errno != ENOENT) {
         /* Error out */
    }
    /* Ignore absence of metadata */

If $dir was created in non-mapped mode, openat() fails with ENOENT and
we pass -1 to unlinkat(), which fails in turn with EBADF.

We just need to check the return of openat() and ignore ENOENT, in order
to restore the behaviour we had with remove().

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
[groug: rewrote the comments as suggested by Eric]
2017-05-25 10:30:13 +02:00
Greg Kurz
a17d8659c4 9pfs: drop pdu_push_and_notify()
Only pdu_complete() needs to notify the client that a request has completed.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
2017-05-25 10:30:13 +02:00
Greg Kurz
57a0aa6b50 fsdev: don't allow unknown format in marshal/unmarshal
The code only uses well known format strings. An unknown format token is a
bug.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
2017-05-25 10:30:13 +02:00
Greg Kurz
506f327582 virtio-9p/xen-9p: move 9p specific bits to core 9p code
These bits aren't related to the transport so let's move them to the core
code.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
2017-05-25 10:30:13 +02:00
Greg Kurz
62f94fc94f xics: add unrealize handler
Now that ICPState objects get finalized on CPU unplug, we should unregister
reset handlers as well to avoid a QEMU crash at machine reset time.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-25 11:31:33 +10:00
Daniel Henrique Barboza
16ee99805e hw/ppc/spapr.c: recover pending LMB unplug info in spapr_lmb_release
When a LMB hot unplug starts, the current DRC LMB status is stored at
spapr->pending_dimm_unplugs QTAILQ. This queue isn't migrated, thus
if a migration occurs in the middle of a LMB unplug the
spapr_lmb_release callback will lost track of the LMB unplug progress.

This patch implements a new recover function spapr_recover_pending_dimm_state
that is used inside spapr_lmb_release to recover this DRC LMB release
status that is lost during the migration.

Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
[dwg: Minor stylistic changes, simplify error handling]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-25 11:31:33 +10:00
Daniel Henrique Barboza
a50919dddf hw/ppc: migrating the DRC state of hotplugged devices
In pseries, a firmware abstraction called Dynamic Reconfiguration
Connector (DRC) is used to assign a particular dynamic resource
to the guest and provide an interface to manage configuration/removal
of the resource associated with it. In other words, DRC is the
'plugged state' of a device.

Before this patch, DRC wasn't being migrated. This causes
post-migration problems due to DRC state mismatch between source and
target. The DRC state of a device X in the source might
change, while in the target the DRC state of X is still fresh. When
migrating the guest, X will not have the same hotplugged state as it
did in the source. This means that we can't hot unplug X in the
target after migration is completed because its DRC state is not consistent.
https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1677552 is one
bug that is caused by this DRC state mismatch between source and
target.

To migrate the DRC state, we defined the VMStateDescription struct for
spapr_drc to enable the transmission of spapr_drc state in migration.
Not all the elements in the DRC state are migrated - only those
that can be modified by guest actions or device add/remove
operations:

- 'isolation_state', 'allocation_state' and 'indicator_state'
are involved in the DR state transition diagram from
PAPR+ 2.7, 13.4;

- 'configured', 'signalled', 'awaiting_release' and 'awaiting_allocation'
are needed in attaching and detaching devices;

- 'indicator_state' provides users with hardware state information.

These are the DRC elements that are migrated.

In this patch the DRC state is migrated for PCI, LMB and CPU
connector types. At this moment there is no support to migrate
DRC for the PHB (PCI Host Bridge) type.

In the 'realize' function the DRC is registered using vmstate_register,
similar to what hw/ppc/spapr_iommu.c does in 'spapr_tce_table_realize'.
This approach works because  DRCs are bus-less and do not sit
on a BusClass that implements bc->get_dev_path, so as a fallback the
VMSD gets identified via "spapr_drc"/get_index(drc).

Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-25 11:31:33 +10:00
Daniel Henrique Barboza
318347234d hw/ppc: removing drc->detach_cb and drc->detach_cb_opaque
The pointer drc->detach_cb is being used as a way of informing
the detach() function inside spapr_drc.c which cb to execute. This
information can also be retrieved simply by checking drc->type and
choosing the right callback based on it. In this context, detach_cb
is redundant information that must be managed.

After the previous spapr_lmb_release change, no detach_cb_opaques
are being used by any of the three callbacks functions. This is
yet another information that is now unused and, on top of that, can't
be migrated either.

This patch makes the following changes:

- removal of detach_cb_opaque. the 'opaque' argument was removed from
the callbacks and from the detach() function of sPAPRConnectorClass. The
attribute detach_cb_opaque of sPAPRConnector was removed.

- removal of detach_cb from the detach() call. The function pointer
detach_cb of sPAPRConnector was removed. detach() now uses a
switch(drc->type) to execute the apropriate callback. To achieve this,
spapr_core_release, spapr_lmb_release and spapr_phb_remove_pci_device_cb
callbacks were made public to be visible inside detach().

Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-25 11:31:33 +10:00
David Gibson
0cffce56ae hw/ppc/spapr.c: adding pending_dimm_unplugs to sPAPRMachineState
The LMB DRC release callback, spapr_lmb_release(), uses an opaque
parameter, a sPAPRDIMMState struct that stores the current LMBs that
are allocated to a DIMM (nr_lmbs). After each call to this callback,
the nr_lmbs is decremented by one and, when it reaches zero, the callback
proceeds with the qdev calls to hot unplug the LMB.

Using drc->detach_cb_opaque is problematic because it can't be migrated in
the future DRC migration work. This patch makes the following changes to
eliminate the usage of this opaque callback inside spapr_lmb_release:

- sPAPRDIMMState was moved from spapr.c and added to spapr.h. A new
attribute called 'addr' was added to it. This is used as an unique
identifier to associate a sPAPRDIMMState to a PCDIMM element.

- sPAPRMachineState now hosts a new QTAILQ called 'pending_dimm_unplugs'.
This queue of sPAPRDIMMState elements will store the DIMM state of DIMMs
that are currently going under an unplug process.

- spapr_lmb_release() will now retrieve the nr_lmbs value by getting the
correspondent sPAPRDIMMState. A helper function called spapr_dimm_get_address
was created to fetch the address of a PCDIMM device inside spapr_lmb_release.
When nr_lmbs reaches zero and the callback proceeds with the qdev hot unplug
calls, the sPAPRDIMMState struct is removed from spapr->pending_dimm_unplugs.

After these changes, the opaque argument for spapr_lmb_release is now
unused and is passed as NULL inside spapr_del_lmbs. This and the other
opaque arguments can now be safely removed from the code.

As an additional cleanup made by this patch, the spapr_del_lmbs function
was merged with spapr_memory_unplug_request. The former was being called
only by the latter and both were small enough to fit one single function.

Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
[dwg: Minor stylistic cleanups]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-25 11:31:28 +10:00
Jeff Cody
223a23c198 block/gluster: glfs_lseek() workaround
On current released versions of glusterfs, glfs_lseek() will sometimes
return invalid values for SEEK_DATA or SEEK_HOLE.  For SEEK_DATA and
SEEK_HOLE, the returned value should be >= the passed offset, or < 0 in
the case of error:

LSEEK(2):

    off_t lseek(int fd, off_t offset, int whence);

    [...]

    SEEK_HOLE
              Adjust  the file offset to the next hole in the file greater
              than or equal to offset.  If offset points into the middle of
              a hole, then the file offset is set to offset.  If there is no
              hole past offset, then the file offset is adjusted to the end
              of the file (i.e., there is  an implicit hole at the end of
              any file).

    [...]

    RETURN VALUE
              Upon  successful  completion,  lseek()  returns  the resulting
              offset location as measured in bytes from the beginning of the
              file.  On error, the value (off_t) -1 is returned and errno is
              set to indicate the error

However, occasionally glfs_lseek() for SEEK_HOLE/DATA will return a
value less than the passed offset, yet greater than zero.

For instance, here are example values observed from this call:

    offs = glfs_lseek(s->fd, start, SEEK_HOLE);
    if (offs < 0) {
        return -errno;          /* D1 and (H3 or H4) */
    }

start == 7608336384
offs == 7607877632

This causes QEMU to abort on the assert test.  When this value is
returned, errno is also 0.

This is a reported and known bug to glusterfs:
https://bugzilla.redhat.com/show_bug.cgi?id=1425293

Although this is being fixed in gluster, we still should work around it
in QEMU, given that multiple released versions of gluster behave this
way.

This patch treats the return case of (offs < start) the same as if an
error value other than ENXIO is returned; we will assume we learned
nothing, and there are no holes in the file.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Message-id: 87c0140e9407c08f6e74b04131b610f2e27c014c.1495560397.git.jcody@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:44:46 -04:00
Paolo Bonzini
eb05e011e2 blockjob: use deferred_to_main_loop to indicate the coroutine has ended
All block jobs are using block_job_defer_to_main_loop as the final
step just before the coroutine terminates.  At this point,
block_job_enter should do nothing, but currently it restarts
the freed coroutine.

Now, the job->co states should probably be changed to an enum
(e.g. BEFORE_START, STARTED, YIELDED, COMPLETED) subsuming
block_job_started, job->deferred_to_main_loop and job->busy.
For now, this patch eliminates the problematic reenter by
removing the reset of job->deferred_to_main_loop (which served
no purpose, as far as I could see) and checking the flag in
block_job_enter.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170508141310.8674-12-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:38:51 -04:00
Paolo Bonzini
4fb588e95b blockjob: reorganize block_job_completed_txn_abort
This splits the part that touches job states from the part that invokes
callbacks.  It will make the code simpler to understand once job states will
be protected by a different mutex than the AioContext lock.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170508141310.8674-11-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:38:51 -04:00
Paolo Bonzini
7e74a73499 blockjob: strengthen a bit test-blockjob-txn
Unlike test-blockjob-txn, QMP releases the reference to the transaction
before the jobs finish.  Thus, qemu-iotest 124 showed a failure while
working on the next patch that the unit tests did not have.  Make
the test a little nastier.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170508141310.8674-10-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:38:51 -04:00
Paolo Bonzini
c8ab5c2dde blockjob: group BlockJob transaction functions together
Yet another pure code movement patch, preparing for the next change.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170508141310.8674-9-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:38:51 -04:00
Paolo Bonzini
4c241cf5d6 blockjob: introduce block_job_cancel_async, check iostatus invariants
The new functions helps respecting the invariant that the coroutine
is entered with false user_resume, zero pause count and no error
recorded in the iostatus.

Resetting the iostatus is now common to all of block_job_cancel_async,
block_job_user_resume and block_job_iostatus_reset, albeit with slight
differences:

- block_job_cancel_async resets the iostatus, and resumes the job if
there was an error, but the coroutine is not restarted immediately.
For example the caller may continue with a call to block_job_finish_sync.

- block_job_user_resume resets the iostatus.  It wants to resume the job
unconditionally, even if there was no error.

- block_job_iostatus_reset doesn't resume the job at all.  Maybe that's
a bug but it should be fixed separately.

block_job_iostatus_reset does the least common denominator, so add some
checking but otherwise leave it as the entry point for resetting the
iostatus.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170508141310.8674-8-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:38:51 -04:00
Paolo Bonzini
2caf63a903 blockjob: move iostatus reset inside block_job_user_resume
Outside blockjob.c, the block_job_iostatus_reset function is used once
in the monitor and once in BlockBackend.  When we introduce the block
job mutex, block_job_iostatus_reset's client is going to be the block
layer (for which blockjob.c will take the block job mutex) rather than
the monitor (which will take the block job mutex by itself).

The monitor's call to block_job_iostatus_reset from the monitor comes
just before the sole call to block_job_user_resume, so reset the
iostatus directly from block_job_iostatus_reset.  This will avoid
the need to introduce separate block_job_iostatus_reset and
block_job_iostatus_reset_locked APIs.

After making this change, move the function together with the others
that were moved in the previous patch.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20170508141310.8674-7-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:38:51 -04:00
Paolo Bonzini
88691b37f8 blockjob: separate monitor and blockjob APIs
We have two different headers for block job operations, blockjob.h
and blockjob_int.h.  The former contains APIs called by the monitor,
the latter contains APIs called by the block job drivers and the
block layer itself.

Keep the two APIs separate in the blockjob.c file too.  This will
be useful when transitioning away from the AioContext lock, because
there will be locking policies for the two categories, too---the
monitor will have to call new block_job_lock/unlock APIs, while blockjob
APIs will take care of this for the users.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170508141310.8674-6-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:38:51 -04:00
Paolo Bonzini
f321dcb57f blockjob: introduce block_job_pause/resume_all
Remove use of block_job_pause/resume from outside blockjob.c, thus
making them static.  The new functions are used by the block layer,
so place them in blockjob_int.h.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20170508141310.8674-5-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:38:51 -04:00
Paolo Bonzini
05b0d8e3b8 blockjob: introduce block_job_early_fail
Outside blockjob.c, block_job_unref is only used when a block job fails
to start, and block_job_ref is not used at all.  The reference counting
thus is pretty well hidden.  Introduce a separate function to be used
by block jobs; because block_job_ref and block_job_unref now become
static, move them earlier in blockjob.c.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20170508141310.8674-4-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:38:51 -04:00
Paolo Bonzini
9f086abbe4 blockjob: remove iostatus_reset callback
This is unused since commit 66a0fae ("blockjob: Don't touch BDS iostatus",
2016-05-19).

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20170508141310.8674-3-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:38:51 -04:00
Paolo Bonzini
6573d9c638 blockjob: remove unnecessary check
!job is always checked prior to the call, drop it from here.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20170508141310.8674-2-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-05-24 16:38:51 -04:00
Stefan Hajnoczi
e1fe27a208 Merge remote-tracking branch 'cohuck/tags/s390x-20170523' into staging
s390x updates:
- support for vfio-ccw to passthrough channel devices
- allow ccw bios to boot from scsi generic devices
- bugfix for initial reset

# gpg: Signature made Tue 23 May 2017 12:02:24 PM BST
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <conny@cornelia-huck.de>"
# gpg:                 aka "Cornelia Huck <cohuck@kernel.org>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# gpg:                 aka "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* cohuck/tags/s390x-20170523: (21 commits)
  s390/kvm: do not reset riccb on initial cpu reset
  MAINTAINERS: Add vfio-ccw maintainer
  vfio/ccw: update sense data if a unit check is pending
  s390x/css: ccw translation infrastructure
  s390x/css: introduce and realize ccw-request callback
  vfio/ccw: get irqs info and set the eventfd fd
  vfio/ccw: get io region info
  vfio/ccw: vfio based subchannel passthrough driver
  s390x/css: device support for s390-ccw passthrough
  s390x/css: realize css_create_sch
  s390x/css: realize css_sch_build_schib
  s390x/css: add s390-squash-mcss machine option
  linux-headers: update
  pc-bios/s390-ccw.img: rebuild image
  pc-bios/s390-ccw: Build a reasonable max_sectors limit
  pc-bios/s390-ccw: Get Block Limits VPD device data
  pc-bios/s390-ccw: Get list of supported VPD pages
  pc-bios/s390-ccw: Refactor scsi_inquiry function
  pc-bios/s390-ccw: Break up virtio-scsi read into multiples
  pc-bios/s390-ccw: Move SCSI block factor to outer read
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-24 13:53:17 +01:00
Laurent Vivier
c871bc70bb spapr: add pre_plug function for memory
This allows to manage errors before the memory
has started to be hotplugged. We already have
the function for the CPU cores.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
[dwg: Fixed a couple of style nits]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-24 17:27:39 +10:00
David Gibson
459264ef24 pseries: Restore support for total vcpus not a multiple of threads-per-core for old machine types
As of pseries-2.7 and later, we require the total number of guest vcpus to
be a multiple of the threads-per-core.  pseries-2.6 and earlier machine
types, however, are supposed to allow this for the sake of migration from
old qemu versions which allowed this.

Unfortunately, 8149e29 "pseries: Enforce homogeneous threads-per-core"
broke this by not considering the old machine type case.  This fixes it by
only applying the check when the machine type supports hotpluggable cpus.
By not-entirely-coincidence, that corresponds to the same time when we
started enforcing total threads being a multiple of threads-per-core.

Fixes: 8149e2992f

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>
2017-05-24 11:39:53 +10:00
David Gibson
80c33d343f pseries: Split CAS PVR negotiation out into a separate function
Guests of the qemu machine type go through a feature negotiation process
known as "client architecture support" (CAS) during early boot.  This does
a number of things, one of which is finding a CPU compatibility mode which
can be supported by both guest and host.

In fact the CPU negotiation is probably the single most complex part of the
CAS process, so this splits it out into a helper function.  We've recently
made some mistakes in maintaining backward compatibility for old machine
types here.  Splitting this out will also make it easier to fix this.

This also adds a possibly useful error message if the negotiation fails
(i.e. if there isn't a CPU mode that's suitable for both guest and host).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-05-24 11:39:53 +10:00
Greg Kurz
3d85885a1b spapr: fix error reporting in xics_system_init()
If the user explicitely asked for kernel-irqchip support and "xics-kvm"
initialization fails, we shouldn't fallback to emulated "xics" as we
do now. It is also awkward to print an error message when we have an
errp pointer argument.

Let's use the errp argument to report the error and let the caller decide.
This simplifies the code as we don't need a local Error * here.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-24 11:39:53 +10:00
Greg Kurz
249127d0df spapr_cpu_core: drop reference on ICP object during CPU realization
When a piece of code allocates an object, it implicitely gets a reference
on it. If it then makes that object a child property of another object, it
should drop its own reference at some point otherwise the child object can
never be finalized. The current code hence leaks one ICP object per CPU
when hot-removing a core.

Failing to add a newly allocated ICP object to the CPU is a bug. While here,
let's ensure QEMU aborts if this ever happens.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-24 11:39:53 +10:00
Daniel Henrique Barboza
bff3063837 hw/ppc/spapr_events.c: removing 'exception' from sPAPREventLogEntry
Currenty we do not have any RTAS event that is reported by the
event-scan interface. The existing events, RTAS_LOG_TYPE_EPOW and
RTAS_LOG_TYPE_HOTPLUG, are being reported by the check-exception
interface and, as such, marked as 'exception=true'.

Commit 79853e18d9, 'spapr_events: event-scan RTAS interface', added
the event_scan interface because the guest kernel requires it to
initialize other required interfaces. It is acting since then as
a stub because no events that would be reported by it were added
since then. However, the existence of the 'exception' boolean adds
an unnecessary load in the future migration of the pending_events,
sPAPREventLogEntry QTAILQ that hosts the pending RTAS events.

To make the code cleaner and ease the future migration changes, this
patch makes the following changes:

- remove the 'exception' boolean that filter these events. There is
nothing to filter since all events are reported by check-exception;

- functions rtas_event_log_queue, rtas_event_log_dequeue and
rtas_event_log_contains don't receive the 'exception' boolean
as parameter;

- event_scan function was simplified. It was calling
'rtas_event_log_dequeue(mask, false)' that was always returning
'NULL' because we have no events that are created with
exception=false, thus in the end it would execute a jump to
'out_no_events' all the time. The function now assumes that
this will always be the case and all the remaining logic were
deleted.

In the future, when or if we add new RTAS events that should
be reported with the event_scan interface, we can refer to
the changes made in this patch to add the event_scan logic
back.

Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-24 11:39:53 +10:00
Greg Kurz
07572c0653 spapr: ensure core_slot isn't NULL in spapr_core_unplug()
If we go that far on the path of hot-removing a core and we find out that
the core-id is invalid, then we have a serious bug.

Let's make it explicit with an assert() instead of dereferencing a NULL
pointer.

This fixes Coverity issue CID 1375404.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-24 11:39:53 +10:00
Greg Kurz
de86eccc0c xics_kvm: cache already enabled vCPU ids
Since commit a45863bda9 ("xics_kvm: Don't enable KVM_CAP_IRQ_XICS if
already enabled"), we were able to re-hotplug a vCPU that had been hot-
unplugged ealier, thanks to a boolean flag in ICPState that we set when
enabling KVM_CAP_IRQ_XICS.

This could work because the lifecycle of all ICPState objects was the
same as the machine. Commit 5bc8d26de2 ("spapr: allocate the ICPState
object from under sPAPRCPUCore") broke this assumption and now we always
pass a freshly allocated ICPState object (ie, with the flag unset) to
icp_kvm_cpu_setup().

This cause re-hotplug to fail with:

Unable to connect CPU8 to kernel XICS: Device or resource busy

Let's fix this by caching all the vCPU ids for which KVM_CAP_IRQ_XICS was
enabled. This also drops the now useless boolean flag from ICPState.

Reported-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
Tested-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-24 11:39:52 +10:00
Bharata B Rao
06ec79e865 spapr: Consolidate HPT freeing code into a routine
Consolidate the code that frees HPT into a separate routine
spapr_free_hpt() as the same chunk of code is called from two places.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-24 11:39:52 +10:00
Greg Kurz
c8a98293f7 spapr-cpu-core: release ICP object when realization fails
While here we introduce a single error path to avoid code duplication.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-24 11:39:52 +10:00
Greg Kurz
175d2aa038 spapr: sanitize error handling in spapr_ics_create()
The spapr_ics_create() function handles errors in a rather convoluted
way, with two local Error * variables. Moreover, failing to parent the
ICS object to the machine should be considered as a bug but it is
currently ignored.

This patch addresses both issues.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-24 11:39:52 +10:00
Greg Kurz
f63ebfe0ac ppc/xics: simplify prototype of xics_spapr_init()
This function only does hypercall and RTAS-call registration, and thus
never returns an error. This patch adapt the prototype to reflect that.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-24 11:39:52 +10:00
Nikunj A Dadhania
a8b7373421 target/ppc: reset reservation in do_rfi()
For transitioning back to userspace after the interrupt.

Suggested-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-24 11:39:52 +10:00
Stefan Hajnoczi
9964e96dc9 Merge remote-tracking branch 'jasowang/tags/net-pull-request' into staging
# gpg: Signature made Tue 23 May 2017 03:27:37 AM BST
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* jasowang/tags/net-pull-request:
  e1000e: Fix ICR "Other" causes clear logic
  net/filter-rewriter: Remove unused option in filter-rewriter
  net/filter-mirror.c: Rename filter_mirror_send() and fix codestyle
  net/filter-mirror.c: Remove duplicate check code.
  hmp / net: Mark host_net_add/remove as deprecated
  COLO-compare: Improve tcp compare trace event readability
  virtio-net: fix wild pointer when remove virtio-net queues
  net/dump: Issue a warning for the deprecated "-net dump"
  net/tap: Replace tap-haiku.c and tap-aix.c by a generic tap-stub.c

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-23 15:01:31 +01:00
Eduardo Habkost
8c1bc1e9d7 qapi-schema: Remove obsolete note from ObjectTypeInfo
The "This command is experimental" note in ObjectTypeInfo is obsolete
since 2012.  Commit 5192082097 removed the
warning from the qom-list-types command documentation, but we forgot to
remove the warning from ObjectTypeInfo.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170516205351.12101-1-ehabkost@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-05-23 13:28:17 +02:00
Eric Blake
579cf1d104 block: Use QDict helpers for --force-share
Fam's addition of --force-share in commits 459571f7 and 335e9937
were developed prior to the addition of QDict scalar insertion
macros, but merged after the general cleanup in commit 46f5ac20.
Patch created mechanically by rerunning:

 spatch --sp-file scripts/coccinelle/qobject.cocci \
        --macro-file scripts/cocci-macro-file.h --dir . --in-place

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170515195439.17677-1-eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-05-23 13:28:17 +02:00
Eric Blake
08fba7ac9b shutdown: Expose bool cause in SHUTDOWN and RESET events
Libvirt would like to be able to distinguish between a SHUTDOWN
event triggered solely by guest request and one triggered by a
SIGTERM or other action on the host.  While qemu_kill_report() was
already able to give different output to stderr based on whether a
shutdown was triggered by a host signal (but NOT by a host UI event,
such as clicking the X on the window), that information was then
lost to management.  The previous patches improved things to use an
enum throughout all callsites, so now we have something ready to
expose through QMP.

Note that for now, the decision was to expose ONLY a boolean,
rather than promoting ShutdownCause to a QAPI enum; this is because
libvirt has not expressed an interest in anything finer-grained.
We can still add additional details, in a backwards-compatible
manner, if a need later arises (if the addition happens before 2.10,
we can replace the bool with an enum; otherwise, the enum will have
to be in addition to the bool); this patch merely adds a helper
shutdown_caused_by_guest() to map the internal enum into the
external boolean.

Update expected iotest outputs to match the new data (complete
coverage of the affected tests is obtained by -raw, -qcow2, and -nbd).

Here is output from 'virsh qemu-monitor-event --loop' with the
patch installed:

event SHUTDOWN at 1492639680.731251 for domain fedora_13: {"guest":true}
event STOP at 1492639680.732116 for domain fedora_13: <null>
event SHUTDOWN at 1492639680.732830 for domain fedora_13: {"guest":false}

Note that libvirt runs qemu with -no-shutdown: the first SHUTDOWN event
was triggered by an action I took directly in the guest (shutdown -h),
at which point qemu stops the vcpus and waits for libvirt to do any
final cleanups; the second SHUTDOWN event is the result of libvirt
sending SIGTERM now that it has completed cleanup.  Libvirt is already
smart enough to only feed the first qemu SHUTDOWN event to the end user
(remember, virsh qemu-monitor-event is a low-level debugging interface
that is explicitly unsupported by libvirt, so it sees things that normal
end users do not); changing qemu to emit SHUTDOWN only once is outside
the scope of this series.

See also https://bugzilla.redhat.com/1384007

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170515214114.15442-6-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-05-23 13:28:17 +02:00
Eric Blake
cf83f14005 shutdown: Add source information to SHUTDOWN and RESET
Time to wire up all the call sites that request a shutdown or
reset to use the enum added in the previous patch.

It would have been less churn to keep the common case with no
arguments as meaning guest-triggered, and only modified the
host-triggered code paths, via a wrapper function, but then we'd
still have to audit that I didn't miss any host-triggered spots;
changing the signature forces us to double-check that I correctly
categorized all callers.

Since command line options can change whether a guest reset request
causes an actual reset vs. a shutdown, it's easy to also add the
information to reset requests.

Signed-off-by: Eric Blake <eblake@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au> [ppc parts]
Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> [SPARC part]
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com> [s390x parts]
Message-Id: <20170515214114.15442-5-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-05-23 13:28:17 +02:00
Eric Blake
802f045a5f shutdown: Preserve shutdown cause through replay
With the recent addition of ShutdownCause, we want to be able to pass
a cause through any shutdown request, and then faithfully replay that
cause when later replaying the same sequence.  The easiest way is to
expand the reply event mechanism to track a series of values for
EVENT_SHUTDOWN, one corresponding to each value of ShutdownCause.

We are free to change the replay stream as needed, since there are
already no guarantees about being able to use a replay stream by
any other version of qemu than the one that generated it.

The cause is not actually fed back until the next patch changes the
signature for requesting a shutdown; a TODO marks that upcoming change.

Yes, this uses the gcc/clang extension of a ranged case label,
but this is not the first time we've used non-C99 constructs.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-Id: <20170515214114.15442-4-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-05-23 13:28:17 +02:00
Eric Blake
aedbe19297 shutdown: Prepare for use of an enum in reset/shutdown_request
We want to track why a guest was shutdown; in particular, being able
to tell the difference between a guest request (such as ACPI request)
and host request (such as SIGINT) will prove useful to libvirt.
Since all requests eventually end up changing shutdown_requested in
vl.c, the logical change is to make that value track the reason,
rather than its current 0/1 contents.

Since command-line options control whether a reset request is turned
into a shutdown request instead, the same treatment is given to
reset_requested.

This patch adds an internal enum ShutdownCause that describes reasons
that a shutdown can be requested, and changes qemu_system_reset() to
pass the reason through, although for now nothing is actually changed
with regards to what gets reported.  The enum could be exported via
QAPI at a later date, if deemed necessary, but for now, there has not
been a request to expose that much detail to end clients.

For the most part, we turn 0 into SHUTDOWN_CAUSE_NONE, and 1 into
SHUTDOWN_CAUSE_HOST_ERROR; the only specific case where we have enough
information right now to use a different value is when we are reacting
to a host signal.  It will take a further patch to edit all call-sites
that can trigger a reset or shutdown request to properly pass in any
other reasons; this patch includes TODOs to point such places out.

qemu_system_reset() trades its 'bool report' parameter for a
'ShutdownCause reason', with all non-zero values having the same
effect; this lets us get rid of the weird #defines for VMRESET_*
as synonyms for bools.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170515214114.15442-3-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-05-23 13:28:17 +02:00
Eric Blake
7af88279e4 shutdown: Simplify shutdown_signal
There is no signal 0 (kill(pid, 0) has special semantics to probe whether
a process is alive), rather than actually sending a signal 0).  So we
can use the simpler 0, instead of -1, for our sentinel of whether a
shutdown request due to a signal has happened.

Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Message-Id: <20170515214114.15442-2-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-05-23 13:28:17 +02:00
Markus Armbruster
fc0f005958 sockets: Plug memory leak in socket_address_flatten()
socket_address_flatten() leaks a SocketAddress when its argument is
null.  Happens when opening a ChardevBackend of type 'udp' that is
configured without a local address.  Screwed up in commit bd269ebc due
to last minute semantic conflict resolution.  Spotted by Coverity.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1494866344-11013-1-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-05-23 13:28:17 +02:00
Greg Kurz
fe2f74af2b scripts/qmp/qom-set: fix the value argument passed to srv.command()
When invoking the script with -s, we end up passing a bogus value
to QEMU:

$ ./scripts/qmp/qom-set -s /var/tmp/qmp-sock-exp /machine.accel kvm
{}
$ ./scripts/qmp/qom-get -s /var/tmp/qmp-sock-exp /machine.accel
/var/tmp/qmp-sock-exp

This happens because sys.argv[2] isn't necessarily the command line
argument that holds the value. It is sys.argv[4] when -s was also
passed.

Actually, the code already has a variable to handle that. This patch
simply uses it.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <149373610338.5144.9635049015143453288.stgit@bahia.lan>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-05-23 13:28:17 +02:00
Sameeh Jubran
82342e91b6 e1000e: Fix ICR "Other" causes clear logic
This commit fixes a bug which causes the guest to hang. The bug was
observed upon a "receive overrun" (bit #6 of the ICR register)
interrupt which could be triggered post migration in a heavy traffic
environment. Even though the "receive overrun" bit (#6) is masked out
by the IMS register (refer to the log below) the driver still receives
an interrupt as the "receive overrun" bit (#6) causes the "Other" -
bit #24 of the ICR register - bit to be set as documented below. The
driver handles the interrupt and clears the "Other" bit (#24) but
doesn't clear the "receive overrun" bit (#6) which leads to an
infinite loop. Apparently the Windows driver expects that the "receive
overrun" bit and other ones - documented below - to be cleared when
the "Other" bit (#24) is cleared.

So to sum that up:
1. Bit #6 of the ICR register is set by heavy traffic
2. As a results of setting bit #6, bit #24 is set
3. The driver receives an interrupt for bit 24 (it doesn't receieve an
   interrupt for bit #6 as it is masked out by IMS)
4. The driver handles and clears the interrupt of bit #24
5. Bit #6 is still set.
6. 2 happens all over again

The Interrupt Cause Read - ICR register:

The ICR has the "Other" bit - bit #24 - that is set when one or more
of the following ICR register's bits are set:

LSC - bit #2, RXO - bit #6, MDAC - bit #9, SRPD - bit #16, ACK - bit
#17, MNG - bit #18

This bug can occur with any of these bits depending on the driver's
behaviour and the way it configures the device. However, trying to
reproduce it with any bit other than RX0 is challenging and came to
failure as the drivers don't implement most of these bits, trying to
reproduce it with LSC (Link Status Change - bit #2) bit didn't succeed
too as it seems that Windows handles this bit differently.

Log sample of the storm:

27563@1494850819.411877:e1000e_irq_pending_interrupts ICR PENDING: 0x1000000 (ICR: 0x815000c2, IMS: 0x1a00004)
27563@1494850819.411900:e1000e_irq_pending_interrupts ICR PENDING: 0x0 (ICR: 0x815000c2, IMS: 0xa00004)
27563@1494850819.411915:e1000e_irq_pending_interrupts ICR PENDING: 0x0 (ICR: 0x815000c2, IMS: 0xa00004)
27563@1494850819.412380:e1000e_irq_pending_interrupts ICR PENDING: 0x0 (ICR: 0x815000c2, IMS: 0xa00004)
27563@1494850819.412395:e1000e_irq_pending_interrupts ICR PENDING: 0x0 (ICR: 0x815000c2, IMS: 0xa00004)
27563@1494850819.412436:e1000e_irq_pending_interrupts ICR PENDING: 0x0 (ICR: 0x815000c2, IMS: 0xa00004)
27563@1494850819.412441:e1000e_irq_pending_interrupts ICR PENDING: 0x0 (ICR: 0x815000c2, IMS: 0xa00004)
27563@1494850819.412998:e1000e_irq_pending_interrupts ICR PENDING: 0x1000000 (ICR: 0x815000c2, IMS: 0x1a00004)

* This bug behaviour wasn't observed with the Linux driver.

This commit solves:
https://bugzilla.redhat.com/show_bug.cgi?id=1447935
https://bugzilla.redhat.com/show_bug.cgi?id=1449490

Cc: qemu-stable@nongnu.org
Signed-off-by: Sameeh Jubran <sjubran@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-05-23 10:10:38 +08:00
Zhang Chen
61fcc16af6 net/filter-rewriter: Remove unused option in filter-rewriter
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-05-23 10:10:38 +08:00
Zhang Chen
e05dc4cf56 net/filter-mirror.c: Rename filter_mirror_send() and fix codestyle
Because filter_mirror_receive_iov() and filter_redirector_receive_iov()
both use the filter_mirror_send() to send packet, so I change
filter_mirror_send() to filter_send() that looks more common.
And fix some codestyle.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-05-23 10:10:38 +08:00
Zhang Chen
e2f8401638 net/filter-mirror.c: Remove duplicate check code.
The s->outdev have checked in filter_mirror_set_outdev().

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-05-23 10:10:38 +08:00
Thomas Huth
559964a1ad hmp / net: Mark host_net_add/remove as deprecated
The netdev_add and netdev_del commands should be used nowadays instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-05-23 10:10:38 +08:00
Zhang Chen
f583dca9ad COLO-compare: Improve tcp compare trace event readability
Because of previous patch's trace arguments over the limit
of UST backend, so I rewrite the patch.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-05-23 10:10:38 +08:00
Yunjian Wang
f989c30cf8 virtio-net: fix wild pointer when remove virtio-net queues
The tx_bh or tx_timer will free in virtio_net_del_queue() function, when
removing virtio-net queues if the guest doesn't support multiqueue. But
it might be still referenced by virtio_net_set_status(), which needs to
be set NULL. And also the tx_waiting needs to be set zero to prevent
virtio_net_set_status() accessing tx_bh or tx_timer.

Cc: qemu-stable@nongnu.org
Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-05-23 10:10:38 +08:00
Thomas Huth
f5ab20a468 net/dump: Issue a warning for the deprecated "-net dump"
Network dumping should be done with "-object filter-dump" nowadays.
Using "-net dump" via the VLAN mechanism is considered as deprecated
and might be removed in a future release. So warn the users now
to inform them to user the filter-dump method instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-05-23 10:10:38 +08:00
Thomas Huth
4348300e75 net/tap: Replace tap-haiku.c and tap-aix.c by a generic tap-stub.c
The files tap-haiku.c and tap-aix.c are identical (except one line
of error message). We should avoid such code duplication, so replace
these by a generic tap-stub.c file instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-05-23 10:10:38 +08:00
Igor Mammedov
c6ff347c80 numa: Silence incomplete mapping warning under qtest
Silence "make check" warnings triggered by the numa/mon/cpus/partial
test case.

Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1495094971-177754-4-git-send-email-imammedo@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-22 14:24:52 -03:00
Stefan Hajnoczi
730a6e875b Merge remote-tracking branch 'mcayland/tags/qemu-openbios-signed' into staging
Update OpenBIOS images

# gpg: Signature made Fri 19 May 2017 05:05:54 PM BST
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* mcayland/tags/qemu-openbios-signed:
  Update OpenBIOS images to 3ebaaa2 built from submodule.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-22 10:12:56 +01:00
Stefan Hajnoczi
0bb8cacd95 Merge remote-tracking branch 'kraxel/tags/pull-audio-20170519-1' into staging
audio: move & rename soundhw init code.

# gpg: Signature made Fri 19 May 2017 12:22:51 PM BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* kraxel/tags/pull-audio-20170519-1:
  audio: Rename hw/audio/audio.h to hw/audio/soundhw.h
  audio: Rename audio_init() to soundhw_init()
  audio: Move arch_init audio code to hw/audio/soundhw.c

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-19 16:54:14 +01:00
Mark Cave-Ayland
415c382483 Update OpenBIOS images to 3ebaaa2 built from submodule.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-05-19 16:52:40 +01:00
Stefan Hajnoczi
a4657d4b91 Merge remote-tracking branch 'kraxel/tags/pull-ui-20170519-1' into staging
ui: egl-headless requires dmabuf support

# gpg: Signature made Fri 19 May 2017 09:46:40 AM BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* kraxel/tags/pull-ui-20170519-1:
  ui: egl-headless requires dmabuf support

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-19 16:44:26 +01:00
Stefan Hajnoczi
14c1f7deb4 Merge remote-tracking branch 'quintela/tags/migration/20170518' into staging
migration/next for 20170518

# gpg: Signature made Thu 18 May 2017 06:23:26 PM BST
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* quintela/tags/migration/20170518:
  migration: Make savevm.c target independent
  exec: Create include for target_page_size()
  migration: migration.h was not needed
  migration: Remove vmstate.h from migration.h
  migration: Remove qemu-file.h from vmstate.h
  migration: Split vmstate-types.c from vmstate.c
  migration: Move qjson.h to migration/
  migration: Remove migration.h from colo.h
  migration: Export qemu-file-channel.c functions in its own file
  migration: Split migration/channel.c for channel operations
  migration: Create migration/xbzrle.h
  block migration: Allow compile time disable
  migration: Remove old MigrationParams
  migration: Remove use of old MigrationParams
  migration: Create block capability
  hmp: Use visitor api for hmp_migrate_set_parameter()
  postcopy: Require RAMBlocks that are whole pages
  migration: Fix non-multiple of page size migration

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-05-19 16:43:46 +01:00
Christian Borntraeger
cb4f4bc353 s390/kvm: do not reset riccb on initial cpu reset
The riccb is kept unchanged during initial cpu reset. Move the data
structure to the other registers that are unchanged.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:31:28 +02:00
Dong Jia Shi
5eb74557cd MAINTAINERS: Add vfio-ccw maintainer
Add Cornelia Huck as the vfio-ccw maintainer.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Message-Id: <20170517004813.58227-14-bjsdjshi@linux.vnet.ibm.com>
[CH: add tree]
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Dong Jia Shi
334e76850b vfio/ccw: update sense data if a unit check is pending
Concurrent-sense data is currently not delivered. This patch stores
the concurrent-sense data to the subchannel if a unit check is pending
and the concurrent-sense bit is enabled. Then a TSCH can retreive the
right IRB data back to the guest.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Message-Id: <20170517004813.58227-13-bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Xiao Feng Ren
bab482d740 s390x/css: ccw translation infrastructure
Implement a basic infrastructure of handling channel I/O instruction
interception for passed through subchannels:
1. Branch the code path of instruction interception handling by
   SubChannel type.
2. For a passed-through subchannel, issue the ORB to kernel to do ccw
   translation and perform an I/O operation.
3. Assign different condition code based on the I/O result, or
   trigger a program check.

Signed-off-by: Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Message-Id: <20170517004813.58227-12-bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Xiao Feng Ren
8ca2b376b4 s390x/css: introduce and realize ccw-request callback
Introduce a new callback on subchannel to handle ccw-request.
Realize the callback in vfio-ccw device. Besides, resort to
the event notifier handler to handling the ccw-request results.
1. Pread the I/O results via MMIO region.
2. Update the scsw info to guest.
3. Inject an I/O interrupt to notify guest the I/O result.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Message-Id: <20170517004813.58227-11-bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Dong Jia Shi
4886b3e9f0 vfio/ccw: get irqs info and set the eventfd fd
vfio-ccw resorts to the eventfd mechanism to communicate with userspace.
We fetch the irqs info via the ioctl VFIO_DEVICE_GET_IRQ_INFO,
register a event notifier to get the eventfd fd which is sent
to kernel via the ioctl VFIO_DEVICE_SET_IRQS, then we can implement
read operation once kernel sends the signal.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Message-Id: <20170517004813.58227-10-bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Dong Jia Shi
c14e706ce9 vfio/ccw: get io region info
vfio-ccw provides an MMIO region for I/O operations. We fetch its
information via ioctls here, then we can use it performing I/O
instructions and retrieving I/O results later on.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Message-Id: <20170517004813.58227-9-bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Xiao Feng Ren
1dcac3e152 vfio/ccw: vfio based subchannel passthrough driver
We use the IOMMU_TYPE1 of VFIO to realize the subchannels
passthrough, implement a vfio based subchannels passthrough
driver called "vfio-ccw".

Support qemu parameters in the style of:
"-device vfio-ccw,sysfsdev=$mdev_file_path,devno=xx.x.xxxx'

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Message-Id: <20170517004813.58227-8-bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Dong Jia Shi
a8eac9431a s390x/css: device support for s390-ccw passthrough
In order to support subchannels pass-through, we introduce a s390
subchannel device called "s390-ccw" to hold the real subchannel info.
The s390-ccw devices inherit from the abstract CcwDevice which connect
to the existing virtual-css-bus.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Message-Id: <20170517004813.58227-7-bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Dong Jia Shi
817d4a6bc8 s390x/css: realize css_create_sch
The S390 virtual css support already has a mechanism to create a
virtual subchannel and provide it to the guest. However, to
pass-through subchannels to a guest, we need to introduce a new
mechanism to create the subchannel according to the real device
information. Thus we reconstruct css_create_virtual_sch to a new
css_create_sch function to handle all these cases and do allocation
and initialization of the subchannel according to the device type
and machine configuration.

Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Message-Id: <20170517004813.58227-6-bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Xiao Feng Ren
8f3cf0128c s390x/css: realize css_sch_build_schib
The S390 virtual css support already has a mechanism to build a
virtual subchannel information block (schib) and provide virtual
subchannels to the guest. However, to pass-through subchannels to
a guest, we need to introduce a new mechanism to build its schib
according to the real device information. Thus we realize a new css
sch_build_schib function to extract the path_masks, chpids, chpid
type from sysfs. To reuse the existing code, we refactor
css_add_virtual_chpid to css_add_chpid.

Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Message-Id: <20170517004813.58227-5-bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Xiao Feng Ren
274250c301 s390x/css: add s390-squash-mcss machine option
We want to support real (i.e. not virtual) channel devices
even for guests that do not support MCSS-E (where guests may
see devices from any channel subsystem image at once). As all
virtio-ccw devices are in css 0xfe (and show up in the default
css 0 for guests not activating MCSS-E), we need an option to
squash both the virtio subchannels and e.g. passed-through
subchannels from their real css (0-3, or 0 for hosts not
activating MCSS-E) into the default css. This will be
exploited in a later patch.

Signed-off-by: Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Message-Id: <20170517004813.58227-4-bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Cornelia Huck
74c98e20a6 linux-headers: update
Update against Linux v4.12-rc1.

Also include the new vfio_ccw.h header.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Eric Farman
f881bbdf72 pc-bios/s390-ccw.img: rebuild image
Contains the following commits:
- pc-bios/s390-ccw: Remove duplicate blk_factor adjustment
- pc-bios/s390-ccw: Move SCSI block factor to outer read
- pc-bios/s390-ccw: Break up virtio-scsi read into multiples
- pc-bios/s390-ccw: Refactor scsi_inquiry function
- pc-bios/s390-ccw: Get list of supported EVPD pages
- pc-bios/s390-ccw: Get Block Limits VPD device data
- pc-bios/s390-ccw: Build a reasonable max_sectors limit

Signed-off-by: Eric Farman <farman@linux.vnet.ibm.com>
Message-Id: <20170510155359.32727-9-farman@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Eric Farman
de4e3ae408 pc-bios/s390-ccw: Build a reasonable max_sectors limit
Now that we've read all the possible limits that have been defined for
a virtio-scsi controller and the disk we're booting from, it's possible
that we are STILL going to exceed the limits of the host device.
For example, a "-device scsi-generic" device does not support the
Block Limits VPD page.

So, let's fallback to something that seems to work for most boot
configurations if larger values were specified (including if nothing
was explicitly specified, and we took default values).

Signed-off-by: Eric Farman <farman@linux.vnet.ibm.com>
Message-Id: <20170510155359.32727-8-farman@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Eric Farman
fe921fc8b7 pc-bios/s390-ccw: Get Block Limits VPD device data
The "Block Limits" Inquiry VPD page is optional for any SCSI device,
but if it's supported it provides a hint of the maximum I/O transfer
length for this particular device. If this page is supported by the
disk, let's issue that Inquiry and use the minimum of it and the
SCSI controller limit. That will cover this scenario:

  qemu-system-s390x ...
    -device virtio-scsi-ccw,id=scsi0,max_sectors=32768 ...
    -drive file=/dev/sda,if=none,id=drive0,format=raw ...
    -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,
            drive=drive0,id=disk0,max_io_size=1048576

controller: 32768 sectors x 512 bytes/sector = 16777216 bytes
      disk:                                     1048576 bytes

Now that we have a limit for a virtio-scsi disk, compare that with the
limit for the virtio-scsi controller when we actually build the I/O.
The minimum of these two limits should be the one we use.

Signed-off-by: Eric Farman <farman@linux.vnet.ibm.com>
Message-Id: <20170510155359.32727-7-farman@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Eric Farman
8edfe85bef pc-bios/s390-ccw: Get list of supported VPD pages
The "Supported Pages" Inquiry EVPD page is mandatory for all SCSI devices,
and is used as a gateway for what VPD pages the device actually supports.
Let's issue this Inquiry, and dump that list with the debug facility.

Signed-off-by: Eric Farman <farman@linux.vnet.ibm.com>
Message-Id: <20170510155359.32727-6-farman@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Eric Farman
9c12359c57 pc-bios/s390-ccw: Refactor scsi_inquiry function
If we want to issue any of the SCSI Inquiry EVPD pages,
which we do, we could use this function to issue both types
of commands with a little bit of refactoring.

Signed-off-by: Eric Farman <farman@linux.vnet.ibm.com>
Message-Id: <20170510155359.32727-5-farman@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Eric Farman
5ffd4a3c2d pc-bios/s390-ccw: Break up virtio-scsi read into multiples
A virtio-scsi request that goes through the host sd driver and exceeds
the maximum transfer size is automatically broken up for us.  But the
equivalent request going to the sg driver presumes that any length
requirements have already been honored.

Let's use the max_sectors field on the virtio-scsi controller device,
and break up all requests (both sd and sg) to avoid this problem.

Signed-off-by: Eric Farman <farman@linux.vnet.ibm.com>
Message-Id: <20170510155359.32727-4-farman@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Eric Farman
98d3c52435 pc-bios/s390-ccw: Move SCSI block factor to outer read
Simple refactoring so that the blk_factor adjustment is
moved into virtio_scsi_read_many routine, in preparation
for another change.

Signed-off-by: Eric Farman <farman@linux.vnet.ibm.com>
Message-Id: <20170510155359.32727-3-farman@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Eric Farman
77c76392b0 pc-bios/s390-ccw: Remove duplicate blk_factor adjustment
When using virtio-scsi, we multiply the READ(10) data_size by
a block factor twice when building the I/O.  This is fine,
since it's only 1 for SCSI disks, but let's clean it up.

Signed-off-by: Eric Farman <farman@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <20170510155359.32727-2-farman@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-05-19 12:29:01 +02:00
Eduardo Habkost
8a824e4d74 audio: Rename hw/audio/audio.h to hw/audio/soundhw.h
All the functions in hw/audio/audio.h are called "soundhw_*()"
and live in hw/audio/audiohw.c. Rename the header file for
consistency.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Hervé Poussineau <hpoussin@reactos.org>
Message-id: 20170508205735.23444-4-ehabkost@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-05-19 10:48:54 +02:00
Eduardo Habkost
4c565674a2 audio: Rename audio_init() to soundhw_init()
To make it consistent with the remaining soundhw.c functions and
avoid confusion with the audio_init() function in audio/audio.c,
rename audio_init() to soundhw_init().

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Message-id: 20170508205735.23444-3-ehabkost@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-05-19 10:48:53 +02:00
Eduardo Habkost
ca89f72092 audio: Move arch_init audio code to hw/audio/soundhw.c
There's no reason to keep the soundhw table in arch_init.c. Move
that code to a new hw/audio/soundhw.c file.

While moving the code, trivial coding style issues were fixed.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170508205735.23444-2-ehabkost@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-05-19 10:48:53 +02:00
Juan Quintela
46d702b106 migration: Make savevm.c target independent
It only needed TARGET_PAGE_SIZE/BITS/BITS_MIN values, so just export
them from exec.h

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-05-18 19:21:00 +02:00
Juan Quintela
51180423a2 exec: Create include for target_page_size()
That is the only function that we need from exec.c, and having to
include the whole sysemu.h for this.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

---

/me leans to be less sloppy with copyright notices
thanks Dave
2017-05-18 19:20:59 +02:00
Juan Quintela
68ba3b0743 migration: migration.h was not needed
This files don't use any function from migration.h, so drop it.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
2017-05-18 19:20:59 +02:00
Juan Quintela
987772d9e7 migration: Remove vmstate.h from migration.h
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

---

Minor rearrangements due to rebase
2017-05-18 19:20:59 +02:00
Juan Quintela
82b9d0f06a migration: Remove qemu-file.h from vmstate.h
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

--

minor rearangements due to the rebase
2017-05-18 19:20:59 +02:00
Juan Quintela
576d1abc20 migration: Split vmstate-types.c from vmstate.c
Now one just has the interperter, and the other has the basic types.
Once there, add copyright boilerplate.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>

--

Use GPL v2 or later.  Detected by David.
2017-05-18 19:20:59 +02:00
Juan Quintela
05b98c22f8 migration: Move qjson.h to migration/
It is only used for migration code.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-05-18 19:20:59 +02:00
Juan Quintela
c59be019e9 migration: Remove migration.h from colo.h
migration.h is not included in any includes now.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
2017-05-18 19:20:59 +02:00
Juan Quintela
40014d81f2 migration: Export qemu-file-channel.c functions in its own file
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-05-18 19:20:50 +02:00
Juan Quintela
dd4339c540 migration: Split migration/channel.c for channel operations
Create an include for its exported functions.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

---
Add proper header
2017-05-18 19:20:24 +02:00
Juan Quintela
709e3fe825 migration: Create migration/xbzrle.h
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
2017-05-18 18:04:54 +02:00
Dr. David Alan Gilbert
ed1701c6a5 block migration: Allow compile time disable
Many users now prefer to use drive_mirror over NBD as an
alternative to the older migrate -b option; drive_mirror is
more complex to setup but gives you more options (e.g. only
migrating some of the disks if some of them are shared).

Allow the large chunk of block migration code to be compiled
out for those who don't use it.

Based on a downstream-patch we've had for a while by Jeff Cody.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>

--

- When compiled out, allow seting block only with false value (eric)
2017-05-18 18:04:54 +02:00
Juan Quintela
a0762d9e34 migration: Remove old MigrationParams
Not used anymore after moving block migration to use capabilities.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
2017-05-18 18:04:54 +02:00
Juan Quintela
ce7c817c85 migration: Remove use of old MigrationParams
We have change in the previous patch to use migration capabilities for
it.  Notice that we continue using the old command line flags from
migrate command from the time being.  Remove the set_params method as
now it is empty.

For savevm, one can't do a:

savevm -b/-i foo

but now one can do:

migrate_set_capability block on
savevm foo

And we can't use block migration. We could disable block capability
unconditionally, but it would not be much better.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>

---
- Maintain shared/enabled dependency (Xu suggestion)
- Now we maintain the dependency on the setter functions
- improve error messages
2017-05-18 18:04:54 +02:00
Juan Quintela
2833c59b94 migration: Create block capability
Create one capability for block migration and one parameter for
incremental block migration.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>

---

- address all Markus comments
- use Markus and Eric text descriptions
- change logic another time
- improve text messages
2017-05-18 18:04:54 +02:00
Juan Quintela
f4a06d1391 hmp: Use visitor api for hmp_migrate_set_parameter()
We only use it for int64 at this point, I am not able to find a way to
parse an int with MiB units.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-05-18 18:04:54 +02:00
Dr. David Alan Gilbert
5d214a92ac postcopy: Require RAMBlocks that are whole pages
It turns out that it's legal to create a VM with RAMBlocks that aren't
a multiple of the pagesize in use; e.g. a 1025M main memory using
2M host pages.  That breaks postcopy's atomic placement of pages,
so disallow it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-05-18 18:04:53 +02:00
Dr. David Alan Gilbert
1eb3fc0a0b migration: Fix non-multiple of page size migration
Unfortunately it's legal to create a VM with a RAM size that's
not a multiple of the underlying host page or huge page size.
Recently I'd changed things to always send host sized pages,
and that breaks if we have say a 1025MB guest on 2MB hugepages.

Unfortunately we can't just make that illegal since it would break
migration from/to existing oddly configured VMs.

Symptom: qemu-system-x86_64: Illegal RAM offset 40100000
     as it transmits the fraction of the hugepage after the end
     of the RAMBlock (may also cause a crash on the source
     - possibly due to clearing bits after the bitmap)

Reported-by:  Yumei Huang <yuhuang@redhat.com>
Red Hat bug: https://bugzilla.redhat.com/show_bug.cgi?id=1449037

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-05-18 18:04:53 +02:00
1273 changed files with 54326 additions and 21920 deletions

8
.gdbinit Normal file
View File

@@ -0,0 +1,8 @@
# GDB may have ./.gdbinit loading disabled by default. In that case you can
# follow the instructions it prints. They boil down to adding the following to
# your home directory's ~/.gdbinit file:
#
# add-auto-load-safe-path /path/to/qemu/.gdbinit
# Load QEMU-specific sub-commands and settings
source scripts/qemu-gdb.py

17
.gitignore vendored
View File

@@ -50,6 +50,7 @@
/qemu-version.h.tmp
/module_block.h
/vscclient
/vhost-user-scsi
/fsdev/virtfs-proxy-helper
*.[1-9]
*.a
@@ -99,14 +100,14 @@
/pc-bios/optionrom/kvmvapic.img
/pc-bios/s390-ccw/s390-ccw.elf
/pc-bios/s390-ccw/s390-ccw.img
/docs/qemu-ga-qapi.texi
/docs/qemu-ga-ref.html
/docs/qemu-ga-ref.info*
/docs/qemu-ga-ref.txt
/docs/qemu-qmp-qapi.texi
/docs/qemu-qmp-ref.html
/docs/qemu-qmp-ref.info*
/docs/qemu-qmp-ref.txt
/docs/interop/qemu-ga-qapi.texi
/docs/interop/qemu-ga-ref.html
/docs/interop/qemu-ga-ref.info*
/docs/interop/qemu-ga-ref.txt
/docs/interop/qemu-qmp-qapi.texi
/docs/interop/qemu-qmp-ref.html
/docs/interop/qemu-qmp-ref.info*
/docs/interop/qemu-qmp-ref.txt
/docs/version.texi
*.tps
.stgit-*

View File

@@ -1,15 +1,22 @@
language: c
git:
submodules: false
env:
global:
- LC_ALL=C
matrix:
- IMAGE=debian-armhf-cross
TARGET_LIST=arm-softmmu,arm-linux-user
TARGET_LIST=arm-softmmu,arm-linux-user,armeb-linux-user
- IMAGE=debian-arm64-cross
TARGET_LIST=aarch64-softmmu,aarch64-linux-user
- IMAGE=debian-s390x-cross
TARGET_LIST=s390x-softmmu,s390x-linux-user
# mips64el-softmmu disabled due to libfdt problem
- IMAGE=debian-mipsel-cross
TARGET_LIST=mipsel-softmmu,mipsel-linux-user,mips64el-linux-user
build:
pre_ci:
- make docker-image-${IMAGE}
- make docker-image-${IMAGE} V=1
pre_ci_boot:
image_name: qemu
image_tag: ${IMAGE}
@@ -17,5 +24,13 @@ build:
options: "-e HOME=/root"
ci:
- unset CC
# some targets require newer up to date packages, for example TARGET_LIST matching
# aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu|mips64el-softmmu)
# see the configure script:
# error_exit "DTC (libfdt) version >= 1.4.2 not present. Your options:"
# " (1) Preferred: Install the DTC (libfdt) devel package"
# " (2) Fetch the DTC submodule, using:"
# " git submodule update --init dtc"
- dpkg --compare-versions `dpkg-query --showformat='${Version}' --show libfdt-dev` ge 1.4.2 || git submodule update --init dtc
- ./configure ${QEMU_CONFIGURE_OPTS} --target-list=${TARGET_LIST}
- make -j2
- make -j$(($(getconf _NPROCESSORS_ONLN) + 1))

View File

@@ -86,6 +86,9 @@ matrix:
- env: CONFIG="--enable-trace-backends=ust"
TEST_CMD=""
compiler: gcc
- env: CONFIG="--disable-tcg"
TEST_CMD=""
compiler: gcc
- env: CONFIG=""
os: osx
compiler: clang

View File

@@ -84,14 +84,10 @@ M: Paolo Bonzini <pbonzini@redhat.com>
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
M: Richard Henderson <rth@twiddle.net>
S: Maintained
F: cpu-exec.c
F: cpu-exec-common.c
F: cpus.c
F: cputlb.c
F: exec.c
F: softmmu_template.h
F: translate-all.*
F: translate-common.c
F: accel/tcg/
F: include/exec/cpu*.h
F: include/exec/exec-all.h
F: include/exec/helper*.h
@@ -277,8 +273,8 @@ Overall
M: Paolo Bonzini <pbonzini@redhat.com>
L: kvm@vger.kernel.org
S: Supported
F: kvm-*
F: */kvm.*
F: accel/kvm/
F: include/sysemu/kvm*.h
ARM
@@ -299,7 +295,7 @@ F: target/ppc/kvm.c
S390
M: Christian Borntraeger <borntraeger@de.ibm.com>
M: Cornelia Huck <cornelia.huck@de.ibm.com>
M: Cornelia Huck <cohuck@redhat.com>
M: Alexander Graf <agraf@suse.de>
S: Maintained
F: target/s390x/kvm.c
@@ -327,7 +323,6 @@ M: Stefano Stabellini <sstabellini@kernel.org>
M: Anthony Perard <anthony.perard@citrix.com>
L: xen-devel@lists.xenproject.org
S: Supported
F: xen-*
F: */xen*
F: hw/9pfs/xen-9p-backend.c
F: hw/char/xen_console.c
@@ -380,7 +375,7 @@ F: hw/*/allwinner*
F: include/hw/*/allwinner*
F: hw/arm/cubieboard.c
ARM PrimeCell
ARM PrimeCell and CMSDK devices
M: Peter Maydell <peter.maydell@linaro.org>
L: qemu-arm@nongnu.org
S: Maintained
@@ -394,6 +389,10 @@ F: hw/intc/pl190.c
F: hw/sd/pl181.c
F: hw/timer/pl031.c
F: include/hw/arm/primecell.h
F: hw/timer/cmsdk-apb-timer.c
F: include/hw/timer/cmsdk-apb-timer.h
F: hw/char/cmsdk-apb-uart.c
F: include/hw/char/cmsdk-apb-uart.h
ARM cores
M: Peter Maydell <peter.maydell@linaro.org>
@@ -455,6 +454,14 @@ S: Maintained
F: hw/arm/integratorcp.c
F: hw/misc/arm_integrator_debug.c
MPS2
M: Peter Maydell <peter.maydell@linaro.org>
L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/mps2.c
F: hw/misc/mps2-scc.c
F: include/hw/misc/mps2-scc.h
Musicpal
M: Jan Kiszka <jan.kiszka@web.de>
L: qemu-arm@nongnu.org
@@ -778,7 +785,7 @@ F: include/hw/sparc/grlib.h
S390 Machines
-------------
S390 Virtio-ccw
M: Cornelia Huck <cornelia.huck@de.ibm.com>
M: Cornelia Huck <cohuck@redhat.com>
M: Christian Borntraeger <borntraeger@de.ibm.com>
M: Alexander Graf <agraf@suse.de>
S: Supported
@@ -1005,6 +1012,14 @@ S: Supported
F: hw/vfio/*
F: include/hw/vfio/
vfio-ccw
M: Cornelia Huck <cohuck@redhat.com>
S: Supported
F: hw/vfio/ccw.c
F: hw/s390x/s390-ccw.c
F: include/hw/s390x/s390-ccw.h
T: git git://github.com/cohuck/qemu.git s390-next
vhost
M: Michael S. Tsirkin <mst@redhat.com>
S: Supported
@@ -1040,7 +1055,7 @@ F: tests/virtio-blk-test.c
T: git git://github.com/stefanha/qemu.git block
virtio-ccw
M: Cornelia Huck <cornelia.huck@de.ibm.com>
M: Cornelia Huck <cohuck@redhat.com>
M: Christian Borntraeger <borntraeger@de.ibm.com>
S: Supported
F: hw/s390x/virtio-ccw.[hc]
@@ -1152,6 +1167,13 @@ F: docs/specs/vmgenid.txt
F: tests/vmgenid-test.c
F: stubs/vmgenid.c
Unimplemented device
M: Peter Maydell <peter.maydell@linaro.org>
R: Philippe Mathieu-Daudé <f4bug@amsat.org>
S: Maintained
F: include/hw/misc/unimp.h
F: hw/misc/unimp.c
Subsystems
----------
Audio
@@ -1230,13 +1252,12 @@ M: Paolo Bonzini <pbonzini@redhat.com>
M: Marc-André Lureau <marcandre.lureau@redhat.com>
S: Maintained
F: chardev/
F: backends/msmouse.c
F: backends/testdev.c
F: include/chardev/
Character Devices (Braille)
M: Samuel Thibault <samuel.thibault@ens-lyon.org>
S: Maintained
F: backends/baum.c
F: chardev/baum.c
Command line option argument parsing
M: Markus Armbruster <armbru@redhat.com>
@@ -1341,15 +1362,6 @@ W: http://info.iet.unipi.it/~luigi/netmap/
S: Maintained
F: net/netmap.c
Network Block Device (NBD)
M: Paolo Bonzini <pbonzini@redhat.com>
S: Odd Fixes
F: block/nbd*
F: nbd/
F: include/block/nbd*
F: qemu-nbd.c
T: git git://github.com/bonzini/qemu.git nbd-next
NUMA
M: Eduardo Habkost <ehabkost@redhat.com>
S: Maintained
@@ -1404,8 +1416,7 @@ F: include/qapi/qmp/
X: include/qapi/qmp/dispatch.h
F: scripts/coccinelle/qobject.cocci
F: tests/check-qdict.c
F: tests/check-qfloat.c
F: tests/check-qint.c
F: tests/check-qnum.c
F: tests/check-qjson.c
F: tests/check-qlist.c
F: tests/check-qstring.c
@@ -1483,7 +1494,7 @@ F: tests/vmstate-static-checker-data/
F: docs/migration.txt
Seccomp
M: Eduardo Otubo <eduardo.otubo@profitbricks.com>
M: Eduardo Otubo <otubo@redhat.com>
S: Supported
F: qemu-seccomp.c
F: include/sysemu/seccomp.h
@@ -1653,7 +1664,7 @@ TCI target
M: Stefan Weil <sw@weilnetz.de>
S: Maintained
F: tcg/tci/
F: tci.c
F: tcg/tci.c
F: disas/tci.c
Block drivers
@@ -1704,6 +1715,18 @@ S: Supported
F: block/iscsi.c
F: block/iscsi-opts.c
Network Block Device (NBD)
M: Eric Blake <eblake@redhat.com>
M: Paolo Bonzini <pbonzini@redhat.com>
L: qemu-block@nongnu.org
S: Maintained
F: block/nbd*
F: nbd/
F: include/block/nbd*
F: qemu-nbd.*
F: blockdev-nbd.c
T: git git://repo.or.cz/qemu/ericb.git nbd
NFS
M: Jeff Cody <jcody@redhat.com>
M: Peter Lieven <pl@kamp.de>
@@ -1851,12 +1874,14 @@ Build and test automation
-------------------------
M: Alex Bennée <alex.bennee@linaro.org>
M: Fam Zheng <famz@redhat.com>
R: Philippe Mathieu-Daudé <f4bug@amsat.org>
L: qemu-devel@nongnu.org
S: Maintained
F: .travis.yml
F: .shippable.yml
F: tests/docker/
W: https://travis-ci.org/qemu/qemu
W: https://app.shippable.com/github/qemu/qemu
W: http://patchew.org/QEMU/
Documentation

View File

@@ -207,8 +207,8 @@ HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)
ifdef BUILD_DOCS
DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
DOCS+=docs/qemu-qmp-ref.html docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7
DOCS+=docs/qemu-ga-ref.html docs/qemu-ga-ref.txt docs/qemu-ga-ref.7
DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7
DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7
ifdef CONFIG_VIRTFS
DOCS+=fsdev/virtfs-proxy-helper.1
endif
@@ -269,6 +269,7 @@ dummy := $(call unnest-vars,, \
ivshmem-client-obj-y \
ivshmem-server-obj-y \
libvhost-user-obj-y \
vhost-user-scsi-obj-y \
qga-vss-dll-obj-y \
block-obj-y \
block-obj-m \
@@ -473,6 +474,8 @@ ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y)
$(call LINK, $^)
module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
$(call quiet-command,$(PYTHON) $< $@ \
@@ -519,11 +522,12 @@ distclean: clean
rm -f qemu-doc.vr qemu-doc.txt
rm -f config.log
rm -f linux-headers/asm
rm -f docs/qemu-ga-qapi.texi docs/qemu-qmp-qapi.texi docs/version.texi
rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
rm -f docs/qemu-qmp-ref.html docs/qemu-ga-ref.html
rm -f docs/version.texi
rm -f docs/interop/qemu-ga-qapi.texi docs/interop/qemu-qmp-qapi.texi
rm -f docs/interop/qemu-qmp-ref.7 docs/interop/qemu-ga-ref.7
rm -f docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
for d in $(TARGET_DIRS); do \
rm -rf $$d || exit 1 ; \
done
@@ -549,7 +553,7 @@ efi-e1000e.rom efi-vmxnet3.rom \
qemu-icon.bmp qemu_logo_no_text.svg \
bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
s390-ccw.img \
s390-ccw.img s390-netboot.img \
spapr-rtas.bin slof.bin skiboot.lid \
palcode-clipper \
u-boot.e500 \
@@ -562,13 +566,13 @@ install-doc: $(DOCS)
$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) qemu-doc.txt "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) docs/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) docs/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) docs/interop/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)"
ifdef CONFIG_POSIX
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7"
$(INSTALL_DATA) docs/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7"
$(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7"
ifneq ($(TOOLS),)
$(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1"
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
@@ -576,9 +580,9 @@ ifneq ($(TOOLS),)
endif
ifneq (,$(findstring qemu-ga,$(TOOLS)))
$(INSTALL_DATA) qemu-ga.8 "$(DESTDIR)$(mandir)/man8"
$(INSTALL_DATA) docs/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) docs/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) docs/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7"
$(INSTALL_DATA) docs/interop/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) docs/interop/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)"
$(INSTALL_DATA) docs/interop/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7"
endif
endif
ifdef CONFIG_VIRTFS
@@ -666,28 +670,27 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
# documentation
MAKEINFO=makeinfo
MAKEINFOFLAGS=--no-split --number-sections -I docs
TEXIFLAG=$(if $(V),,--quiet)
MAKEINFOINCLUDES= -I docs -I $(<D) -I $(@D)
MAKEINFOFLAGS=--no-split --number-sections $(MAKEINFOINCLUDES)
TEXI2PODFLAGS=$(MAKEINFOINCLUDES) "-DVERSION=$(VERSION)"
TEXI2PDFFLAGS=$(if $(V),,--quiet) -I $(SRC_PATH) $(MAKEINFOINCLUDES)
docs/version.texi: $(SRC_PATH)/VERSION
$(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
%.html: %.texi
%.html: %.texi docs/version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--html $< -o $@,"GEN","$@")
%.info: %.texi
%.info: %.texi docs/version.texi
$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
%.txt: %.texi
%.txt: %.texi docs/version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--plaintext $< -o $@,"GEN","$@")
%.pdf: %.texi
$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I docs $< -o $@,"GEN","$@")
docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.txt docs/qemu-ga-ref.pdf docs/qemu-ga-ref.7.pod: docs/version.texi
docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.pod: docs/version.texi
%.pdf: %.texi docs/version.texi
$(call quiet-command,texi2pdf $(TEXI2PDFFLAGS) $< -o $@,"GEN","$@")
qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
@@ -701,12 +704,12 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt
qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
docs/qemu-qmp-qapi.texi docs/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py)
docs/interop/qemu-qmp-qapi.texi docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py)
docs/qemu-qmp-qapi.texi: $(qapi-modules)
docs/interop/qemu-qmp-qapi.texi: $(qapi-modules)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
docs/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json
docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
@@ -716,21 +719,25 @@ fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi
qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi
qemu-ga.8: qemu-ga.texi
html: qemu-doc.html docs/qemu-qmp-ref.html docs/qemu-ga-ref.html
info: qemu-doc.info docs/qemu-qmp-ref.info docs/qemu-ga-ref.info
pdf: qemu-doc.pdf docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
txt: qemu-doc.txt docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info
pdf: qemu-doc.pdf docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
qemu-monitor-info.texi
docs/qemu-ga-ref.dvi docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.pdf docs/qemu-ga-ref.txt docs/qemu-ga-ref.7: \
docs/qemu-ga-ref.texi docs/qemu-ga-qapi.texi
docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \
docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \
docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7: \
docs/interop/qemu-ga-ref.texi docs/interop/qemu-ga-qapi.texi
docs/qemu-qmp-ref.dvi docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7: \
docs/qemu-qmp-ref.texi docs/qemu-qmp-qapi.texi
docs/interop/qemu-qmp-ref.dvi docs/interop/qemu-qmp-ref.html \
docs/interop/qemu-qmp-ref.info docs/interop/qemu-qmp-ref.pdf \
docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7: \
docs/interop/qemu-qmp-ref.texi docs/interop/qemu-qmp-qapi.texi
ifdef CONFIG_WIN32
@@ -791,9 +798,11 @@ endif # CONFIG_WIN
# Add a dependency on the generated files, so that they are always
# rebuilt before other object files
ifneq ($(wildcard config-host.mak),)
ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
Makefile: $(GENERATED_FILES)
endif
endif
.SECONDARY: $(TRACE_HEADERS) $(TRACE_HEADERS:%=%-timestamp) \
$(TRACE_SOURCES) $(TRACE_SOURCES:%=%-timestamp) \

View File

@@ -40,7 +40,7 @@ io-obj-y = io/
ifeq ($(CONFIG_SOFTMMU),y)
common-obj-y = blockdev.o blockdev-nbd.o block/
common-obj-y += iothread.o
common-obj-y += bootdevice.o iothread.o
common-obj-y += net/
common-obj-y += qdev-monitor.o device-hotplug.o
common-obj-$(CONFIG_WIN32) += os-win32.o
@@ -50,11 +50,8 @@ common-obj-$(CONFIG_LINUX) += fsdev/
common-obj-y += migration/
common-obj-$(CONFIG_SPICE) += spice-qemu-char.o
common-obj-y += audio/
common-obj-y += hw/
common-obj-y += accel.o
common-obj-y += replay/
@@ -70,6 +67,7 @@ common-obj-y += tpm.o
common-obj-$(CONFIG_SLIRP) += slirp/
common-obj-y += backends/
common-obj-y += chardev/
common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o
@@ -112,6 +110,10 @@ qga-vss-dll-obj-y = qga/
ivshmem-client-obj-y = contrib/ivshmem-client/
ivshmem-server-obj-y = contrib/ivshmem-server/
libvhost-user-obj-y = contrib/libvhost-user/
vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
vhost-user-scsi.o-libs := $(LIBISCSI_LIBS)
vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
vhost-user-scsi-obj-y += contrib/libvhost-user/libvhost-user.o
######################################################################
trace-events-subdirs =
@@ -120,7 +122,7 @@ trace-events-subdirs += crypto
trace-events-subdirs += io
trace-events-subdirs += migration
trace-events-subdirs += block
trace-events-subdirs += backends
trace-events-subdirs += chardev
trace-events-subdirs += hw/block
trace-events-subdirs += hw/block/dataplane
trace-events-subdirs += hw/char
@@ -163,6 +165,9 @@ trace-events-subdirs += target/ppc
trace-events-subdirs += qom
trace-events-subdirs += linux-user
trace-events-subdirs += qapi
trace-events-subdirs += accel/tcg
trace-events-subdirs += accel/kvm
trace-events-subdirs += nbd
trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)

View File

@@ -88,20 +88,17 @@ all: $(PROGS) stap
#########################################################
# cpu emulator library
obj-y = exec.o translate-all.o cpu-exec.o
obj-y += translate-common.o
obj-y += cpu-exec-common.o
obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
obj-$(CONFIG_TCG_INTERPRETER) += tci.o
obj-y += tcg/tcg-common.o
obj-y += exec.o
obj-y += accel/
obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
obj-$(CONFIG_TCG) += tcg/tcg-common.o tcg/tcg-runtime.o
obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
obj-y += fpu/softfloat.o
obj-y += target/$(TARGET_BASE_ARCH)/
obj-y += disas.o
obj-y += tcg-runtime.o
obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o
obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o
obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decNumber.o
@@ -140,13 +137,12 @@ endif #CONFIG_BSD_USER
# System emulator target
ifdef CONFIG_SOFTMMU
obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
obj-y += qtest.o bootdevice.o
obj-y += qtest.o
obj-y += hw/
obj-$(CONFIG_KVM) += kvm-all.o
obj-y += memory.o cputlb.o
obj-y += memory.o
obj-y += memory_mapping.o
obj-y += dump.o
obj-y += migration/ram.o migration/savevm.o
obj-y += migration/ram.o
LIBS := $(libs_softmmu) $(LIBS)
# Hardware support

4
accel/Makefile.objs Normal file
View File

@@ -0,0 +1,4 @@
obj-$(CONFIG_SOFTMMU) += accel.o
obj-y += kvm/
obj-$(CONFIG_TCG) += tcg/
obj-y += stubs/

View File

@@ -34,15 +34,6 @@
#include "hw/xen/xen.h"
#include "qom/object.h"
int tcg_tb_size;
static bool tcg_allowed = true;
static int tcg_init(MachineState *ms)
{
tcg_exec_init(tcg_tb_size * 1024 * 1024);
return 0;
}
static const TypeInfo accel_type = {
.name = TYPE_ACCEL,
.parent = TYPE_OBJECT,
@@ -129,27 +120,15 @@ void configure_accelerator(MachineState *ms)
}
}
static void tcg_accel_class_init(ObjectClass *oc, void *data)
void accel_register_compat_props(AccelState *accel)
{
AccelClass *ac = ACCEL_CLASS(oc);
ac->name = "tcg";
ac->init_machine = tcg_init;
ac->allowed = &tcg_allowed;
AccelClass *class = ACCEL_GET_CLASS(accel);
register_compat_props_array(class->global_props);
}
#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
static const TypeInfo tcg_accel_type = {
.name = TYPE_TCG_ACCEL,
.parent = TYPE_ACCEL,
.class_init = tcg_accel_class_init,
};
static void register_accel_types(void)
{
type_register_static(&accel_type);
type_register_static(&tcg_accel_type);
}
type_init(register_accel_types);

1
accel/kvm/Makefile.objs Normal file
View File

@@ -0,0 +1 @@
obj-$(CONFIG_KVM) += kvm-all.o

View File

@@ -23,6 +23,7 @@
#include "qemu/option.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "hw/hw.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
@@ -35,7 +36,7 @@
#include "exec/ram_addr.h"
#include "exec/address-spaces.h"
#include "qemu/event_notifier.h"
#include "trace-root.h"
#include "trace.h"
#include "hw/irq.h"
#include "hw/boards.h"
@@ -317,7 +318,7 @@ int kvm_init_vcpu(CPUState *cpu)
cpu->kvm_fd = ret;
cpu->kvm_state = s;
cpu->kvm_vcpu_dirty = true;
cpu->vcpu_dirty = true;
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size < 0) {
@@ -980,15 +981,6 @@ static MemoryListener kvm_io_listener = {
.priority = 10,
};
static void kvm_handle_interrupt(CPUState *cpu, int mask)
{
cpu->interrupt_request |= mask;
if (!qemu_cpu_is_self(cpu)) {
qemu_cpu_kick(cpu);
}
}
int kvm_set_irq(KVMState *s, int irq, int level)
{
struct kvm_irq_level event;
@@ -1144,6 +1136,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
}
clear_gsi(s, virq);
kvm_arch_release_virq_post(virq);
trace_kvm_irqchip_release_virq(virq);
}
static unsigned int kvm_hash_msi(uint32_t data)
@@ -1287,7 +1280,8 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
return -EINVAL;
}
trace_kvm_irqchip_add_msi_route(virq);
trace_kvm_irqchip_add_msi_route(dev ? dev->name : (char *)"N/A",
vector, virq);
kvm_add_routing_entry(s, &kroute);
kvm_arch_add_msi_route_post(&kroute, vector, dev);
@@ -1746,6 +1740,8 @@ static int kvm_init(MachineState *ms)
kvm_ioeventfd_any_length_allowed =
(kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0);
kvm_state = s;
ret = kvm_arch_init(ms, s);
if (ret < 0) {
goto err;
@@ -1755,8 +1751,6 @@ static int kvm_init(MachineState *ms)
kvm_irqchip_create(ms, s);
}
kvm_state = s;
if (kvm_eventfds_allowed) {
s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
@@ -1771,8 +1765,6 @@ static int kvm_init(MachineState *ms)
s->many_ioeventfds = kvm_check_many_ioeventfds();
cpu_interrupt_handler = kvm_handle_interrupt;
return 0;
err:
@@ -1861,15 +1853,15 @@ void kvm_flush_coalesced_mmio_buffer(void)
static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
{
if (!cpu->kvm_vcpu_dirty) {
if (!cpu->vcpu_dirty) {
kvm_arch_get_registers(cpu);
cpu->kvm_vcpu_dirty = true;
cpu->vcpu_dirty = true;
}
}
void kvm_cpu_synchronize_state(CPUState *cpu)
{
if (!cpu->kvm_vcpu_dirty) {
if (!cpu->vcpu_dirty) {
run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL);
}
}
@@ -1877,7 +1869,7 @@ void kvm_cpu_synchronize_state(CPUState *cpu)
static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
{
kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
cpu->kvm_vcpu_dirty = false;
cpu->vcpu_dirty = false;
}
void kvm_cpu_synchronize_post_reset(CPUState *cpu)
@@ -1888,7 +1880,7 @@ void kvm_cpu_synchronize_post_reset(CPUState *cpu)
static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
{
kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
cpu->kvm_vcpu_dirty = false;
cpu->vcpu_dirty = false;
}
void kvm_cpu_synchronize_post_init(CPUState *cpu)
@@ -1896,6 +1888,16 @@ void kvm_cpu_synchronize_post_init(CPUState *cpu)
run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
}
static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
{
cpu->vcpu_dirty = true;
}
void kvm_cpu_synchronize_pre_loadvm(CPUState *cpu)
{
run_on_cpu(cpu, do_kvm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
}
#ifdef KVM_HAVE_MCE_INJECTION
static __thread void *pending_sigbus_addr;
static __thread int pending_sigbus_code;
@@ -1964,13 +1966,14 @@ int kvm_cpu_exec(CPUState *cpu)
}
qemu_mutex_unlock_iothread();
cpu_exec_start(cpu);
do {
MemTxAttrs attrs;
if (cpu->kvm_vcpu_dirty) {
if (cpu->vcpu_dirty) {
kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE);
cpu->kvm_vcpu_dirty = false;
cpu->vcpu_dirty = false;
}
kvm_arch_pre_run(cpu, run);
@@ -2052,7 +2055,7 @@ int kvm_cpu_exec(CPUState *cpu)
break;
case KVM_EXIT_SHUTDOWN:
DPRINTF("shutdown\n");
qemu_system_reset_request();
qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
ret = EXCP_INTERRUPT;
break;
case KVM_EXIT_UNKNOWN:
@@ -2066,11 +2069,11 @@ int kvm_cpu_exec(CPUState *cpu)
case KVM_EXIT_SYSTEM_EVENT:
switch (run->system_event.type) {
case KVM_SYSTEM_EVENT_SHUTDOWN:
qemu_system_shutdown_request();
qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
ret = EXCP_INTERRUPT;
break;
case KVM_SYSTEM_EVENT_RESET:
qemu_system_reset_request();
qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
ret = EXCP_INTERRUPT;
break;
case KVM_SYSTEM_EVENT_CRASH:
@@ -2093,6 +2096,7 @@ int kvm_cpu_exec(CPUState *cpu)
}
} while (ret == 0);
cpu_exec_end(cpu);
qemu_mutex_lock_iothread();
if (ret < 0) {
@@ -2204,8 +2208,8 @@ int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
return kvm_device_ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute) ? 0 : 1;
}
void kvm_device_access(int fd, int group, uint64_t attr,
void *val, bool write)
int kvm_device_access(int fd, int group, uint64_t attr,
void *val, bool write, Error **errp)
{
struct kvm_device_attr kvmattr;
int err;
@@ -2219,11 +2223,12 @@ void kvm_device_access(int fd, int group, uint64_t attr,
write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR,
&kvmattr);
if (err < 0) {
error_report("KVM_%s_DEVICE_ATTR failed: %s",
write ? "SET" : "GET", strerror(-err));
error_printf("Group %d attr 0x%016" PRIx64 "\n", group, attr);
abort();
error_setg_errno(errp, -err,
"KVM_%s_DEVICE_ATTR failed: Group %d "
"attr 0x%016" PRIx64,
write ? "SET" : "GET", group, attr);
}
return err;
}
/* Return 1 on success, 0 on failure */
@@ -2269,6 +2274,11 @@ int kvm_has_intx_set_mask(void)
return kvm_state->intx_set_mask;
}
bool kvm_arm_supports_user_irq(void)
{
return kvm_check_extension(kvm_state, KVM_CAP_ARM_USER_IRQ);
}
#ifdef KVM_CAP_SET_GUEST_DEBUG
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
target_ulong pc)

15
accel/kvm/trace-events Normal file
View File

@@ -0,0 +1,15 @@
# Trace events for debugging and performance instrumentation
# kvm-all.c
kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p"
kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
kvm_irqchip_commit_routes(void) ""
kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
kvm_irqchip_release_virq(int virq) "virq %d"

View File

@@ -0,0 +1,2 @@
obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
obj-$(call lnot,$(CONFIG_TCG)) += tcg-stub.o

View File

@@ -155,4 +155,9 @@ void kvm_init_cpu_signals(CPUState *cpu)
{
abort();
}
bool kvm_arm_supports_user_irq(void)
{
return false;
}
#endif

22
accel/stubs/tcg-stub.c Normal file
View File

@@ -0,0 +1,22 @@
/*
* QEMU TCG accelerator stub
*
* Copyright Red Hat, Inc. 2013
*
* Author: Paolo Bonzini <pbonzini@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "cpu.h"
#include "tcg/tcg.h"
#include "exec/cpu-common.h"
#include "exec/exec-all.h"
void tb_flush(CPUState *cpu)
{
}

3
accel/tcg/Makefile.objs Normal file
View File

@@ -0,0 +1,3 @@
obj-$(CONFIG_SOFTMMU) += tcg-all.o
obj-$(CONFIG_SOFTMMU) += cputlb.o
obj-y += cpu-exec.o cpu-exec-common.o translate-all.o

View File

@@ -23,6 +23,8 @@
#include "exec/exec-all.h"
#include "exec/memory-internal.h"
bool tcg_allowed;
/* exit the current TB, but without causing any exception to be raised */
void cpu_loop_exit_noexc(CPUState *cpu)
{

View File

@@ -18,7 +18,7 @@
*/
#include "qemu/osdep.h"
#include "cpu.h"
#include "trace-root.h"
#include "trace.h"
#include "disas/disas.h"
#include "exec/exec-all.h"
#include "tcg.h"
@@ -280,6 +280,7 @@ struct tb_desc {
CPUArchState *env;
tb_page_addr_t phys_page1;
uint32_t flags;
uint32_t trace_vcpu_dstate;
};
static bool tb_cmp(const void *p, const void *d)
@@ -291,6 +292,7 @@ static bool tb_cmp(const void *p, const void *d)
tb->page_addr[0] == desc->phys_page1 &&
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags &&
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
!atomic_read(&tb->invalid)) {
/* check next page if needed */
if (tb->page_addr[1] == -1) {
@@ -309,10 +311,8 @@ static bool tb_cmp(const void *p, const void *d)
return false;
}
static TranslationBlock *tb_htable_lookup(CPUState *cpu,
target_ulong pc,
target_ulong cs_base,
uint32_t flags)
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags)
{
tb_page_addr_t phys_pc;
struct tb_desc desc;
@@ -321,10 +321,11 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu,
desc.env = (CPUArchState *)cpu->env_ptr;
desc.cs_base = cs_base;
desc.flags = flags;
desc.trace_vcpu_dstate = *cpu->trace_dstate;
desc.pc = pc;
phys_pc = get_page_addr_code(desc.env, pc);
desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
h = tb_hash_func(phys_pc, pc, flags);
h = tb_hash_func(phys_pc, pc, flags, *cpu->trace_dstate);
return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
}
@@ -344,7 +345,8 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
tb->flags != flags)) {
tb->flags != flags ||
tb->trace_vcpu_dstate != *cpu->trace_dstate)) {
tb = tb_htable_lookup(cpu, pc, cs_base, flags);
if (!tb) {

View File

@@ -118,7 +118,7 @@ static void tlb_flush_nocheck(CPUState *cpu)
memset(env->tlb_table, -1, sizeof(env->tlb_table));
memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
cpu_tb_jmp_cache_clear(cpu);
env->vtlb_index = 0;
env->tlb_flush_addr = -1;
@@ -183,7 +183,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
}
}
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
cpu_tb_jmp_cache_clear(cpu);
tlb_debug("done\n");
@@ -746,41 +746,6 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
return ram_addr;
}
/* NOTE: this function can trigger an exception */
/* NOTE2: the returned address is not exactly the physical address: it
* is actually a ram_addr_t (in system mode; the user mode emulation
* version of this function returns a guest virtual address).
*/
tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
{
int mmu_idx, page_index, pd;
void *p;
MemoryRegion *mr;
CPUState *cpu = ENV_GET_CPU(env1);
CPUIOTLBEntry *iotlbentry;
page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = cpu_mmu_index(env1, true);
if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
(addr & TARGET_PAGE_MASK))) {
cpu_ldub_code(env1, addr);
}
iotlbentry = &env1->iotlb[mmu_idx][page_index];
pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
if (memory_region_is_unassigned(mr)) {
cpu_unassigned_access(cpu, addr, false, true, 0, 4);
/* The CPU's unassigned access hook might have longjumped out
* with an exception. If it didn't (or there was no hook) then
* we can't proceed further.
*/
report_bad_exec(cpu, addr);
exit(1);
}
p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
return qemu_ram_addr_from_host_nofail(p);
}
static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
target_ulong addr, uintptr_t retaddr, int size)
{
@@ -868,6 +833,53 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
(ADDR) & TARGET_PAGE_MASK)
/* NOTE: this function can trigger an exception */
/* NOTE2: the returned address is not exactly the physical address: it
* is actually a ram_addr_t (in system mode; the user mode emulation
* version of this function returns a guest virtual address).
*/
tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
{
int mmu_idx, index, pd;
void *p;
MemoryRegion *mr;
CPUState *cpu = ENV_GET_CPU(env);
CPUIOTLBEntry *iotlbentry;
index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = cpu_mmu_index(env, true);
if (unlikely(env->tlb_table[mmu_idx][index].addr_code !=
(addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK)))) {
if (!VICTIM_TLB_HIT(addr_read, addr)) {
tlb_fill(ENV_GET_CPU(env), addr, MMU_INST_FETCH, mmu_idx, 0);
}
}
iotlbentry = &env->iotlb[mmu_idx][index];
pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
if (memory_region_is_unassigned(mr)) {
qemu_mutex_lock_iothread();
if (memory_region_request_mmio_ptr(mr, addr)) {
qemu_mutex_unlock_iothread();
/* A MemoryRegion is potentially added so re-run the
* get_page_addr_code.
*/
return get_page_addr_code(env, addr);
}
qemu_mutex_unlock_iothread();
cpu_unassigned_access(cpu, addr, false, true, 0, 4);
/* The CPU's unassigned access hook might have longjumped out
* with an exception. If it didn't (or there was no hook) then
* we can't proceed further.
*/
report_bad_exec(cpu, addr);
exit(1);
}
p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend);
return qemu_ram_addr_from_host_nofail(p);
}
/* Probe for whether the specified guest write access is permitted.
* If it is not permitted then an exception will be taken in the same
* way as if this were a real write access (and we will not return).

92
accel/tcg/tcg-all.c Normal file
View File

@@ -0,0 +1,92 @@
/*
* QEMU System Emulator, accelerator interfaces
*
* Copyright (c) 2003-2008 Fabrice Bellard
* Copyright (c) 2014 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "sysemu/accel.h"
#include "sysemu/sysemu.h"
#include "qom/object.h"
#include "qemu-common.h"
#include "qom/cpu.h"
#include "sysemu/cpus.h"
#include "qemu/main-loop.h"
unsigned long tcg_tb_size;
#ifndef CONFIG_USER_ONLY
/* mask must never be zero, except for A20 change call */
static void tcg_handle_interrupt(CPUState *cpu, int mask)
{
int old_mask;
g_assert(qemu_mutex_iothread_locked());
old_mask = cpu->interrupt_request;
cpu->interrupt_request |= mask;
/*
* If called from iothread context, wake the target cpu in
* case its halted.
*/
if (!qemu_cpu_is_self(cpu)) {
qemu_cpu_kick(cpu);
} else {
cpu->icount_decr.u16.high = -1;
if (use_icount &&
!cpu->can_do_io
&& (mask & ~old_mask) != 0) {
cpu_abort(cpu, "Raised interrupt while not in I/O function");
}
}
}
#endif
static int tcg_init(MachineState *ms)
{
tcg_exec_init(tcg_tb_size * 1024 * 1024);
cpu_interrupt_handler = tcg_handle_interrupt;
return 0;
}
static void tcg_accel_class_init(ObjectClass *oc, void *data)
{
AccelClass *ac = ACCEL_CLASS(oc);
ac->name = "tcg";
ac->init_machine = tcg_init;
ac->allowed = &tcg_allowed;
}
#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
static const TypeInfo tcg_accel_type = {
.name = TYPE_TCG_ACCEL,
.parent = TYPE_ACCEL,
.class_init = tcg_accel_class_init,
};
static void register_accel_types(void)
{
type_register_static(&tcg_accel_type);
}
type_init(register_accel_types);

10
accel/tcg/trace-events Normal file
View File

@@ -0,0 +1,10 @@
# Trace events for debugging and performance instrumentation
# TCG related tracing (mostly disabled by default)
# cpu-exec.c
disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x"
# translate-all.c
translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"

View File

@@ -25,7 +25,7 @@
#include "qemu-common.h"
#define NO_CPU_IO_DEFS
#include "cpu.h"
#include "trace-root.h"
#include "trace.h"
#include "disas/disas.h"
#include "exec/exec-all.h"
#include "tcg.h"
@@ -54,6 +54,7 @@
#include "exec/tb-hash.h"
#include "translate-all.h"
#include "qemu/bitmap.h"
#include "qemu/error-report.h"
#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "exec/log.h"
@@ -112,8 +113,10 @@ typedef struct PageDesc {
#define V_L2_BITS 10
#define V_L2_SIZE (1 << V_L2_BITS)
uintptr_t qemu_host_page_size;
intptr_t qemu_host_page_mask;
/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
sizeof(((TranslationBlock *)0)->trace_vcpu_dstate)
* BITS_PER_BYTE);
/*
* L1 Mapping properties
@@ -363,21 +366,6 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
return r;
}
void page_size_init(void)
{
/* NOTE: we can always suppose that qemu_host_page_size >=
TARGET_PAGE_SIZE */
qemu_real_host_page_size = getpagesize();
qemu_real_host_page_mask = -(intptr_t)qemu_real_host_page_size;
if (qemu_host_page_size == 0) {
qemu_host_page_size = qemu_real_host_page_size;
}
if (qemu_host_page_size < TARGET_PAGE_SIZE) {
qemu_host_page_size = TARGET_PAGE_SIZE;
}
qemu_host_page_mask = -(intptr_t)qemu_host_page_size;
}
static void page_init(void)
{
page_size_init();
@@ -522,9 +510,7 @@ static inline PageDesc *page_find(tb_page_addr_t index)
#elif defined(__powerpc__)
# define MAX_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
#elif defined(__aarch64__)
# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024)
#elif defined(__arm__)
# define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
#elif defined(__s390x__)
/* We have a +- 4GB range on the branches; leave some slop. */
# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
@@ -781,12 +767,13 @@ static inline void code_gen_alloc(size_t tb_size)
exit(1);
}
/* Estimate a good size for the number of TBs we can support. We
still haven't deducted the prologue from the buffer size here,
but that's minimal and won't affect the estimate much. */
tcg_ctx.code_gen_max_blocks
= tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks);
/* size this conservatively -- realloc later if needed */
tcg_ctx.tb_ctx.tbs_size =
tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE / 8;
if (unlikely(!tcg_ctx.tb_ctx.tbs_size)) {
tcg_ctx.tb_ctx.tbs_size = 64 * 1024;
}
tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock *, tcg_ctx.tb_ctx.tbs_size);
qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
}
@@ -803,6 +790,7 @@ static void tb_htable_init(void)
size. */
void tcg_exec_init(unsigned long tb_size)
{
tcg_allowed = true;
cpu_gen_init();
page_init();
tb_htable_init();
@@ -814,11 +802,6 @@ void tcg_exec_init(unsigned long tb_size)
#endif
}
bool tcg_enabled(void)
{
return tcg_ctx.code_gen_buffer != NULL;
}
/*
* Allocate a new translation block. Flush the translation buffer if
* too many translation blocks or too much generated code.
@@ -828,16 +811,20 @@ bool tcg_enabled(void)
static TranslationBlock *tb_alloc(target_ulong pc)
{
TranslationBlock *tb;
TBContext *ctx;
assert_tb_locked();
if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
tb = tcg_tb_alloc(&tcg_ctx);
if (unlikely(tb == NULL)) {
return NULL;
}
tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
tb->pc = pc;
tb->cflags = 0;
tb->invalid = false;
ctx = &tcg_ctx.tb_ctx;
if (unlikely(ctx->nb_tbs == ctx->tbs_size)) {
ctx->tbs_size *= 2;
ctx->tbs = g_renew(TranslationBlock *, ctx->tbs, ctx->tbs_size);
}
ctx->tbs[ctx->nb_tbs++] = tb;
return tb;
}
@@ -850,8 +837,10 @@ void tb_free(TranslationBlock *tb)
Ignore the hard cases and just back up if this TB happens to
be the last one generated. */
if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
tcg_ctx.code_gen_ptr = tb->tc_ptr;
tb == tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
size_t struct_size = ROUND_UP(sizeof(*tb), qemu_icache_linesize);
tcg_ctx.code_gen_ptr = tb->tc_ptr - struct_size;
tcg_ctx.tb_ctx.nb_tbs--;
}
}
@@ -923,11 +912,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
}
CPU_FOREACH(cpu) {
int i;
for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
atomic_set(&cpu->tb_jmp_cache[i], NULL);
}
cpu_tb_jmp_cache_clear(cpu);
}
tcg_ctx.tb_ctx.nb_tbs = 0;
@@ -1092,7 +1077,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
/* remove the TB from the hash list */
phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
h = tb_hash_func(phys_pc, tb->pc, tb->flags);
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
qht_remove(&tcg_ctx.tb_ctx.htable, tb, h);
/* remove the TB from the page list */
@@ -1237,7 +1222,7 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
}
/* add in the hash table */
h = tb_hash_func(phys_pc, tb->pc, tb->flags);
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);
#ifdef DEBUG_TB_CHECK
@@ -1279,9 +1264,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
gen_code_buf = tcg_ctx.code_gen_ptr;
tb->tc_ptr = gen_code_buf;
tb->pc = pc;
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
tb->trace_vcpu_dstate = *cpu->trace_dstate;
tb->invalid = false;
#ifdef CONFIG_PROFILER
tcg_ctx.tb_count1++; /* includes aborted translations because of
@@ -1666,7 +1654,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
while (m_min <= m_max) {
m = (m_min + m_max) >> 1;
tb = &tcg_ctx.tb_ctx.tbs[m];
tb = tcg_ctx.tb_ctx.tbs[m];
v = (uintptr_t)tb->tc_ptr;
if (v == tc_ptr) {
return tb;
@@ -1676,7 +1664,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
m_min = m + 1;
}
}
return &tcg_ctx.tb_ctx.tbs[m_max];
return tcg_ctx.tb_ctx.tbs[m_max];
}
#if !defined(CONFIG_USER_ONLY)
@@ -1806,19 +1794,21 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
cpu_loop_exit_noexc(cpu);
}
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
{
unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
atomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
}
}
void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
{
unsigned int i;
/* Discard jump cache entries for any tb which might potentially
overlap the flushed page. */
i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
memset(&cpu->tb_jmp_cache[i], 0,
TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
i = tb_jmp_cache_hash_page(addr);
memset(&cpu->tb_jmp_cache[i], 0,
TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
tb_jmp_cache_clear_page(cpu, addr);
}
static void print_qht_statistics(FILE *f, fprintf_function cpu_fprintf,
@@ -1874,7 +1864,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
direct_jmp_count = 0;
direct_jmp2_count = 0;
for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
tb = &tcg_ctx.tb_ctx.tbs[i];
tb = tcg_ctx.tb_ctx.tbs[i];
target_code_size += tb->size;
if (tb->size > max_target_code_size) {
max_target_code_size = tb->size;
@@ -1894,8 +1884,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
cpu_fprintf(f, "gen code size %td/%zd\n",
tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer);
cpu_fprintf(f, "TB count %d/%d\n",
tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
cpu_fprintf(f, "TB count %d\n", tcg_ctx.tb_ctx.nb_tbs);
cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
tcg_ctx.tb_ctx.nb_tbs : 0,
@@ -2219,3 +2208,11 @@ int page_unprotect(target_ulong address, uintptr_t pc)
return 0;
}
#endif /* CONFIG_USER_ONLY */
/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
void tcg_flush_softmmu_tlb(CPUState *cs)
{
#ifdef CONFIG_SOFTMMU
tlb_flush(cs);
#endif
}

View File

@@ -27,7 +27,7 @@
#include "sysemu/sysemu.h"
#include "sysemu/arch_init.h"
#include "hw/pci/pci.h"
#include "hw/audio/audio.h"
#include "hw/audio/soundhw.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
#include "qmp-commands.h"
@@ -85,130 +85,6 @@ int graphic_depth = 32;
const uint32_t arch_type = QEMU_ARCH;
struct soundhw {
const char *name;
const char *descr;
int enabled;
int isa;
union {
int (*init_isa) (ISABus *bus);
int (*init_pci) (PCIBus *bus);
} init;
};
static struct soundhw soundhw[9];
static int soundhw_count;
void isa_register_soundhw(const char *name, const char *descr,
int (*init_isa)(ISABus *bus))
{
assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
soundhw[soundhw_count].name = name;
soundhw[soundhw_count].descr = descr;
soundhw[soundhw_count].isa = 1;
soundhw[soundhw_count].init.init_isa = init_isa;
soundhw_count++;
}
void pci_register_soundhw(const char *name, const char *descr,
int (*init_pci)(PCIBus *bus))
{
assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
soundhw[soundhw_count].name = name;
soundhw[soundhw_count].descr = descr;
soundhw[soundhw_count].isa = 0;
soundhw[soundhw_count].init.init_pci = init_pci;
soundhw_count++;
}
void select_soundhw(const char *optarg)
{
struct soundhw *c;
if (is_help_option(optarg)) {
show_valid_cards:
if (soundhw_count) {
printf("Valid sound card names (comma separated):\n");
for (c = soundhw; c->name; ++c) {
printf ("%-11s %s\n", c->name, c->descr);
}
printf("\n-soundhw all will enable all of the above\n");
} else {
printf("Machine has no user-selectable audio hardware "
"(it may or may not have always-present audio hardware).\n");
}
exit(!is_help_option(optarg));
}
else {
size_t l;
const char *p;
char *e;
int bad_card = 0;
if (!strcmp(optarg, "all")) {
for (c = soundhw; c->name; ++c) {
c->enabled = 1;
}
return;
}
p = optarg;
while (*p) {
e = strchr(p, ',');
l = !e ? strlen(p) : (size_t) (e - p);
for (c = soundhw; c->name; ++c) {
if (!strncmp(c->name, p, l) && !c->name[l]) {
c->enabled = 1;
break;
}
}
if (!c->name) {
if (l > 80) {
error_report("Unknown sound card name (too big to show)");
}
else {
error_report("Unknown sound card name `%.*s'",
(int) l, p);
}
bad_card = 1;
}
p += l + (e != NULL);
}
if (bad_card) {
goto show_valid_cards;
}
}
}
void audio_init(void)
{
struct soundhw *c;
ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL);
PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL);
for (c = soundhw; c->name; ++c) {
if (c->enabled) {
if (c->isa) {
if (!isa_bus) {
error_report("ISA bus not available for %s", c->name);
exit(1);
}
c->init.init_isa(isa_bus);
} else {
if (!pci_bus) {
error_report("PCI bus not available for %s", c->name);
exit(1);
}
c->init.init_pci(pci_bus);
}
}
}
}
int kvm_available(void)
{
#ifdef CONFIG_KVM

View File

@@ -71,6 +71,12 @@ void NAME (void *opaque, struct st_sample *ibuf, struct st_sample *obuf,
while (rate->ipos <= (rate->opos >> 32)) {
ilast = *ibuf++;
rate->ipos++;
/* if ipos overflow, there is a infinite loop */
if (rate->ipos == 0xffffffff) {
rate->ipos = 1;
rate->opos = rate->opos & 0xffffffff;
}
/* See if we finished the input buffer yet */
if (ibuf >= iend) {
goto the_end;

View File

@@ -1,10 +1,6 @@
common-obj-y += rng.o rng-egd.o
common-obj-$(CONFIG_POSIX) += rng-random.o
common-obj-y += msmouse.o wctablet.o testdev.o
common-obj-$(CONFIG_BRLAPI) += baum.o
baum.o-cflags := $(SDL_CFLAGS)
common-obj-$(CONFIG_TPM) += tpm.o
common-obj-y += hostmem.o hostmem-ram.o

View File

@@ -222,7 +222,7 @@ cryptodev_backend_can_be_deleted(UserCreatable *uc, Error **errp)
static void cryptodev_backend_instance_init(Object *obj)
{
object_property_add(obj, "queues", "int",
object_property_add(obj, "queues", "uint32",
cryptodev_backend_get_queues,
cryptodev_backend_set_queues,
NULL, NULL, NULL);

View File

@@ -28,7 +28,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
}
path = object_get_canonical_path_component(OBJECT(backend));
memory_region_init_ram(&backend->mr, OBJECT(backend), path,
memory_region_init_ram_nomigrate(&backend->mr, OBJECT(backend), path,
backend->size, errp);
g_free(path);
}

View File

@@ -12,7 +12,7 @@
#include "qemu/osdep.h"
#include "sysemu/rng.h"
#include "sysemu/char.h"
#include "chardev/char-fe.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
@@ -106,7 +106,7 @@ static void rng_egd_opened(RngBackend *b, Error **errp)
/* FIXME we should resubmit pending requests when the CDS reconnects. */
qemu_chr_fe_set_handlers(&s->chr, rng_egd_chr_can_read,
rng_egd_chr_read, NULL, s, NULL, true);
rng_egd_chr_read, NULL, NULL, s, NULL, true);
}
static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
@@ -145,7 +145,7 @@ static void rng_egd_finalize(Object *obj)
{
RngEgd *s = RNG_EGD(obj);
qemu_chr_fe_deinit(&s->chr);
qemu_chr_fe_deinit(&s->chr, false);
g_free(s->chr_name);
}

View File

@@ -1,10 +0,0 @@
# See docs/tracing.txt for syntax documentation.
# backends/wctablet.c
wct_init(void) ""
wct_cmd_re(void) ""
wct_cmd_st(void) ""
wct_cmd_sp(void) ""
wct_cmd_ts(int input) "0x%02x"
wct_cmd_other(const char *cmd) "%s"
wct_speed(int speed) "%d"

349
block.c
View File

@@ -163,11 +163,16 @@ void path_combine(char *dest, int dest_size,
if (path_is_absolute(filename)) {
pstrcpy(dest, dest_size, filename);
} else {
p = strchr(base_path, ':');
if (p)
p++;
else
p = base_path;
const char *protocol_stripped = NULL;
if (path_has_protocol(base_path)) {
protocol_stripped = strchr(base_path, ':');
if (protocol_stripped) {
protocol_stripped++;
}
}
p = protocol_stripped ?: base_path;
p1 = strrchr(base_path, '/');
#ifdef _WIN32
{
@@ -192,6 +197,41 @@ void path_combine(char *dest, int dest_size,
}
}
/*
* Helper function for bdrv_parse_filename() implementations to remove optional
* protocol prefixes (especially "file:") from a filename and for putting the
* stripped filename into the options QDict if there is such a prefix.
*/
void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
QDict *options)
{
if (strstart(filename, prefix, &filename)) {
/* Stripping the explicit protocol prefix may result in a protocol
* prefix being (wrongly) detected (if the filename contains a colon) */
if (path_has_protocol(filename)) {
QString *fat_filename;
/* This means there is some colon before the first slash; therefore,
* this cannot be an absolute path */
assert(!path_is_absolute(filename));
/* And we can thus fix the protocol detection issue by prefixing it
* by "./" */
fat_filename = qstring_from_str("./");
qstring_append(fat_filename, filename);
assert(!path_has_protocol(qstring_get_str(fat_filename)));
qdict_put(options, "filename", fat_filename);
} else {
/* If no protocol prefix was detected, we can use the shortened
* filename as-is */
qdict_put_str(options, "filename", filename);
}
}
}
/* Returns whether the image file is opened as read-only. Note that this can
* return false and writing to the image file is still not possible because the
* image is inactivated. */
@@ -280,6 +320,8 @@ BlockDriverState *bdrv_new(void)
QLIST_INIT(&bs->op_blockers[i]);
}
notifier_with_return_list_init(&bs->before_write_notifiers);
qemu_co_mutex_init(&bs->reqs_lock);
qemu_mutex_init(&bs->dirty_bitmap_mutex);
bs->refcnt = 1;
bs->aio_context = qemu_get_aio_context();
@@ -1260,7 +1302,9 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
goto fail_opts;
}
assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
/* bdrv_new() and bdrv_close() make it so */
assert(atomic_read(&bs->copy_on_read) == 0);
if (bs->open_flags & BDRV_O_COPY_ON_READ) {
if (!bs->read_only) {
bdrv_enable_copy_on_read(bs);
@@ -2141,6 +2185,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
ret = -EINVAL;
goto free_exit;
}
bdrv_set_aio_context(backing_hd, bdrv_get_aio_context(bs));
/* Hook up the backing file link; drop our reference, bs owns the
* backing_hd reference now */
@@ -2529,15 +2574,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
goto close_and_fail;
}
if (!bdrv_key_required(bs)) {
bdrv_parent_cb_change_media(bs, true);
} else if (!runstate_check(RUN_STATE_PRELAUNCH)
&& !runstate_check(RUN_STATE_INMIGRATE)
&& !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
error_setg(errp,
"Guest must be stopped for opening of encrypted image");
goto close_and_fail;
}
bdrv_parent_cb_change_media(bs, true);
QDECREF(options);
@@ -2945,24 +2982,45 @@ error:
void bdrv_reopen_commit(BDRVReopenState *reopen_state)
{
BlockDriver *drv;
BlockDriverState *bs;
bool old_can_write, new_can_write;
assert(reopen_state != NULL);
drv = reopen_state->bs->drv;
bs = reopen_state->bs;
drv = bs->drv;
assert(drv != NULL);
old_can_write =
!bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
/* If there are any driver level actions to take */
if (drv->bdrv_reopen_commit) {
drv->bdrv_reopen_commit(reopen_state);
}
/* set BDS specific flags now */
QDECREF(reopen_state->bs->explicit_options);
QDECREF(bs->explicit_options);
reopen_state->bs->explicit_options = reopen_state->explicit_options;
reopen_state->bs->open_flags = reopen_state->flags;
reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
bs->explicit_options = reopen_state->explicit_options;
bs->open_flags = reopen_state->flags;
bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
bdrv_refresh_limits(reopen_state->bs, NULL);
bdrv_refresh_limits(bs, NULL);
new_can_write =
!bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
if (!old_can_write && new_can_write && drv->bdrv_reopen_bitmaps_rw) {
Error *local_err = NULL;
if (drv->bdrv_reopen_bitmaps_rw(bs, &local_err) < 0) {
/* This is not fatal, bitmaps just left read-only, so all following
* writes will fail. User can remove read-only bitmaps to unblock
* writes.
*/
error_reportf_err(local_err,
"%s: Failed to make dirty bitmaps writable: ",
bdrv_get_node_name(bs));
}
}
}
/*
@@ -2996,9 +3054,6 @@ static void bdrv_close(BlockDriverState *bs)
bdrv_flush(bs);
bdrv_drain(bs); /* in case flush left pending I/O */
bdrv_release_named_dirty_bitmaps(bs);
assert(QLIST_EMPTY(&bs->dirty_bitmaps));
if (bs->drv) {
BdrvChild *child, *next;
@@ -3023,12 +3078,11 @@ static void bdrv_close(BlockDriverState *bs)
g_free(bs->opaque);
bs->opaque = NULL;
bs->copy_on_read = 0;
atomic_set(&bs->copy_on_read, 0);
bs->backing_file[0] = '\0';
bs->backing_format[0] = '\0';
bs->total_sectors = 0;
bs->encrypted = false;
bs->valid_key = false;
bs->sg = false;
QDECREF(bs->options);
QDECREF(bs->explicit_options);
@@ -3037,6 +3091,9 @@ static void bdrv_close(BlockDriverState *bs)
bs->full_open_options = NULL;
}
bdrv_release_named_dirty_bitmaps(bs);
assert(QLIST_EMPTY(&bs->dirty_bitmaps));
QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
g_free(ban);
}
@@ -3354,7 +3411,8 @@ exit:
/**
* Truncate file to 'offset' bytes (needed only for file protocols)
*/
int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)
int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
Error **errp)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
@@ -3377,12 +3435,12 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)
assert(!(bs->open_flags & BDRV_O_INACTIVE));
ret = drv->bdrv_truncate(bs, offset, errp);
ret = drv->bdrv_truncate(bs, offset, prealloc, errp);
if (ret == 0) {
ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
bdrv_dirty_bitmap_truncate(bs);
bdrv_parent_cb_resize(bs);
++bs->write_gen;
atomic_inc(&bs->write_gen);
}
return ret;
}
@@ -3406,6 +3464,41 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
return -ENOTSUP;
}
/*
* bdrv_measure:
* @drv: Format driver
* @opts: Creation options for new image
* @in_bs: Existing image containing data for new image (may be NULL)
* @errp: Error object
* Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo())
* or NULL on error
*
* Calculate file size required to create a new image.
*
* If @in_bs is given then space for allocated clusters and zero clusters
* from that image are included in the calculation. If @opts contains a
* backing file that is shared by @in_bs then backing clusters may be omitted
* from the calculation.
*
* If @in_bs is NULL then the calculation includes no allocated clusters
* unless a preallocation option is given in @opts.
*
* Note that @in_bs may use a different BlockDriver from @drv.
*
* If an error occurs the @errp pointer is set.
*/
BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
BlockDriverState *in_bs, Error **errp)
{
if (!drv->bdrv_measure) {
error_setg(errp, "Block driver '%s' does not support size measurement",
drv->format_name);
return NULL;
}
return drv->bdrv_measure(opts, in_bs, errp);
}
/**
* Return number of sectors on success, -errno on error.
*/
@@ -3458,72 +3551,6 @@ bool bdrv_is_encrypted(BlockDriverState *bs)
return bs->encrypted;
}
bool bdrv_key_required(BlockDriverState *bs)
{
BdrvChild *backing = bs->backing;
if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
return true;
}
return (bs->encrypted && !bs->valid_key);
}
int bdrv_set_key(BlockDriverState *bs, const char *key)
{
int ret;
if (bs->backing && bs->backing->bs->encrypted) {
ret = bdrv_set_key(bs->backing->bs, key);
if (ret < 0)
return ret;
if (!bs->encrypted)
return 0;
}
if (!bs->encrypted) {
return -EINVAL;
} else if (!bs->drv || !bs->drv->bdrv_set_key) {
return -ENOMEDIUM;
}
ret = bs->drv->bdrv_set_key(bs, key);
if (ret < 0) {
bs->valid_key = false;
} else if (!bs->valid_key) {
/* call the change callback now, we skipped it on open */
bs->valid_key = true;
bdrv_parent_cb_change_media(bs, true);
}
return ret;
}
/*
* Provide an encryption key for @bs.
* If @key is non-null:
* If @bs is not encrypted, fail.
* Else if the key is invalid, fail.
* Else set @bs's key to @key, replacing the existing key, if any.
* If @key is null:
* If @bs is encrypted and still lacks a key, fail.
* Else do nothing.
* On failure, store an error object through @errp if non-null.
*/
void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
{
if (key) {
if (!bdrv_is_encrypted(bs)) {
error_setg(errp, "Node '%s' is not encrypted",
bdrv_get_device_or_node_name(bs));
} else if (bdrv_set_key(bs, key) < 0) {
error_setg(errp, QERR_INVALID_PASSWORD);
}
} else {
if (bdrv_key_required(bs)) {
error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
"'%s' (%s) is encrypted",
bdrv_get_device_or_node_name(bs),
bdrv_get_encrypted_filename(bs));
}
}
}
const char *bdrv_get_format_name(BlockDriverState *bs)
{
return bs->drv ? bs->drv->format_name : NULL;
@@ -4091,6 +4118,10 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
}
}
/* At this point persistent bitmaps should be already stored by the format
* driver */
bdrv_release_persistent_dirty_bitmaps(bs);
return 0;
}
@@ -4223,11 +4254,9 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
if (!QLIST_EMPTY(&bs->op_blockers[op])) {
blocker = QLIST_FIRST(&bs->op_blockers[op]);
if (errp) {
*errp = error_copy(blocker->reason);
error_prepend(errp, "Node '%s' is busy: ",
bdrv_get_device_or_node_name(bs));
}
error_propagate(errp, error_copy(blocker->reason));
error_prepend(errp, "Node '%s' is busy: ",
bdrv_get_device_or_node_name(bs));
return true;
}
return false;
@@ -4367,55 +4396,65 @@ void bdrv_img_create(const char *filename, const char *fmt,
backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
// The size for the image must always be specified, with one exception:
// If we are using a backing file, we can obtain the size from there
/* The size for the image must always be specified, unless we have a backing
* file and we have not been forbidden from opening it. */
size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
if (size == -1) {
if (backing_file) {
BlockDriverState *bs;
char *full_backing = g_new0(char, PATH_MAX);
int64_t size;
int back_flags;
QDict *backing_options = NULL;
if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
BlockDriverState *bs;
char *full_backing = g_new0(char, PATH_MAX);
int back_flags;
QDict *backing_options = NULL;
bdrv_get_full_backing_filename_from_filename(filename, backing_file,
full_backing, PATH_MAX,
&local_err);
if (local_err) {
g_free(full_backing);
goto out;
}
/* backing files always opened read-only */
back_flags = flags;
back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
if (backing_fmt) {
backing_options = qdict_new();
qdict_put_str(backing_options, "driver", backing_fmt);
}
bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
&local_err);
bdrv_get_full_backing_filename_from_filename(filename, backing_file,
full_backing, PATH_MAX,
&local_err);
if (local_err) {
g_free(full_backing);
if (!bs) {
goto out;
}
size = bdrv_getlength(bs);
if (size < 0) {
error_setg_errno(errp, -size, "Could not get size of '%s'",
backing_file);
bdrv_unref(bs);
goto out;
}
qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
bdrv_unref(bs);
} else {
error_setg(errp, "Image creation needs a size parameter");
goto out;
}
/* backing files always opened read-only */
back_flags = flags;
back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
if (backing_fmt) {
backing_options = qdict_new();
qdict_put_str(backing_options, "driver", backing_fmt);
}
bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
&local_err);
g_free(full_backing);
if (!bs && size != -1) {
/* Couldn't open BS, but we have a size, so it's nonfatal */
warn_reportf_err(local_err,
"Could not verify backing image. "
"This may become an error in future versions.\n");
local_err = NULL;
} else if (!bs) {
/* Couldn't open bs, do not have size */
error_append_hint(&local_err,
"Could not open backing image to determine size.\n");
goto out;
} else {
if (size == -1) {
/* Opened BS, have no size */
size = bdrv_getlength(bs);
if (size < 0) {
error_setg_errno(errp, -size, "Could not get size of '%s'",
backing_file);
bdrv_unref(bs);
goto out;
}
qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
}
bdrv_unref(bs);
}
} /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
if (size == -1) {
error_setg(errp, "Image creation needs a size parameter");
goto out;
}
if (!quiet) {
@@ -4889,3 +4928,25 @@ void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
}
bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
uint32_t granularity, Error **errp)
{
BlockDriver *drv = bs->drv;
if (!drv) {
error_setg_errno(errp, ENOMEDIUM,
"Can't store persistent bitmaps to %s",
bdrv_get_device_or_node_name(bs));
return false;
}
if (!drv->bdrv_can_store_new_dirty_bitmap) {
error_setg_errno(errp, ENOTSUP,
"Can't store persistent bitmaps to %s",
bdrv_get_device_or_node_name(bs));
return false;
}
return drv->bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp);
}

View File

@@ -1,6 +1,6 @@
block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o
block-obj-y += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
block-obj-y += quorum.o

View File

@@ -32,23 +32,28 @@
static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;
void block_acct_init(BlockAcctStats *stats, bool account_invalid,
bool account_failed)
void block_acct_init(BlockAcctStats *stats)
{
stats->account_invalid = account_invalid;
stats->account_failed = account_failed;
qemu_mutex_init(&stats->lock);
if (qtest_enabled()) {
clock_type = QEMU_CLOCK_VIRTUAL;
}
}
void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
bool account_failed)
{
stats->account_invalid = account_invalid;
stats->account_failed = account_failed;
}
void block_acct_cleanup(BlockAcctStats *stats)
{
BlockAcctTimedStats *s, *next;
QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
g_free(s);
}
qemu_mutex_destroy(&stats->lock);
}
void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
@@ -58,12 +63,15 @@ void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
s = g_new0(BlockAcctTimedStats, 1);
s->interval_length = interval_length;
s->stats = stats;
qemu_mutex_lock(&stats->lock);
QSLIST_INSERT_HEAD(&stats->intervals, s, entries);
for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
timed_average_init(&s->latency[i], clock_type,
(uint64_t) interval_length * NANOSECONDS_PER_SECOND);
}
qemu_mutex_unlock(&stats->lock);
}
BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
@@ -86,7 +94,8 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
cookie->type = type;
}
void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,
bool failed)
{
BlockAcctTimedStats *s;
int64_t time_ns = qemu_clock_get_ns(clock_type);
@@ -98,31 +107,16 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
assert(cookie->type < BLOCK_MAX_IOTYPE);
stats->nr_bytes[cookie->type] += cookie->bytes;
stats->nr_ops[cookie->type]++;
stats->total_time_ns[cookie->type] += latency_ns;
stats->last_access_time_ns = time_ns;
qemu_mutex_lock(&stats->lock);
QSLIST_FOREACH(s, &stats->intervals, entries) {
timed_average_account(&s->latency[cookie->type], latency_ns);
if (failed) {
stats->failed_ops[cookie->type]++;
} else {
stats->nr_bytes[cookie->type] += cookie->bytes;
stats->nr_ops[cookie->type]++;
}
}
void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
{
assert(cookie->type < BLOCK_MAX_IOTYPE);
stats->failed_ops[cookie->type]++;
if (stats->account_failed) {
BlockAcctTimedStats *s;
int64_t time_ns = qemu_clock_get_ns(clock_type);
int64_t latency_ns = time_ns - cookie->start_time_ns;
if (qtest_enabled()) {
latency_ns = qtest_latency_ns;
}
if (!failed || stats->account_failed) {
stats->total_time_ns[cookie->type] += latency_ns;
stats->last_access_time_ns = time_ns;
@@ -130,29 +124,45 @@ void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
timed_average_account(&s->latency[cookie->type], latency_ns);
}
}
qemu_mutex_unlock(&stats->lock);
}
void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
{
block_account_one_io(stats, cookie, false);
}
void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
{
block_account_one_io(stats, cookie, true);
}
void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
{
assert(type < BLOCK_MAX_IOTYPE);
/* block_acct_done() and block_acct_failed() update
* total_time_ns[], but this one does not. The reason is that
* invalid requests are accounted during their submission,
* therefore there's no actual I/O involved. */
/* block_account_one_io() updates total_time_ns[], but this one does
* not. The reason is that invalid requests are accounted during their
* submission, therefore there's no actual I/O involved.
*/
qemu_mutex_lock(&stats->lock);
stats->invalid_ops[type]++;
if (stats->account_invalid) {
stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
}
qemu_mutex_unlock(&stats->lock);
}
void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
int num_requests)
{
assert(type < BLOCK_MAX_IOTYPE);
qemu_mutex_lock(&stats->lock);
stats->merged[type] += num_requests;
qemu_mutex_unlock(&stats->lock);
}
int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
@@ -167,7 +177,9 @@ double block_acct_queue_depth(BlockAcctTimedStats *stats,
assert(type < BLOCK_MAX_IOTYPE);
qemu_mutex_lock(&stats->stats->lock);
sum = timed_average_sum(&stats->latency[type], &elapsed);
qemu_mutex_unlock(&stats->stats->lock);
return (double) sum / elapsed;
}

View File

@@ -39,7 +39,7 @@ typedef struct BackupBlockJob {
BlockdevOnError on_source_error;
BlockdevOnError on_target_error;
CoRwlock flush_rwlock;
uint64_t sectors_read;
uint64_t bytes_read;
unsigned long *done_bitmap;
int64_t cluster_size;
bool compress;
@@ -47,12 +47,6 @@ typedef struct BackupBlockJob {
QLIST_HEAD(, CowRequest) inflight_reqs;
} BackupBlockJob;
/* Size of a cluster in sectors, instead of bytes. */
static inline int64_t cluster_size_sectors(BackupBlockJob *job)
{
return job->cluster_size / BDRV_SECTOR_SIZE;
}
/* See if in-flight requests overlap and wait for them to complete */
static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
int64_t start,
@@ -64,7 +58,7 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
do {
retry = false;
QLIST_FOREACH(req, &job->inflight_reqs, list) {
if (end > req->start && start < req->end) {
if (end > req->start_byte && start < req->end_byte) {
qemu_co_queue_wait(&req->wait_queue, NULL);
retry = true;
break;
@@ -75,10 +69,10 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
/* Keep track of an in-flight request */
static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
int64_t start, int64_t end)
int64_t start, int64_t end)
{
req->start = start;
req->end = end;
req->start_byte = start;
req->end_byte = end;
qemu_co_queue_init(&req->wait_queue);
QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
}
@@ -91,7 +85,7 @@ static void cow_request_end(CowRequest *req)
}
static int coroutine_fn backup_do_cow(BackupBlockJob *job,
int64_t sector_num, int nb_sectors,
int64_t offset, uint64_t bytes,
bool *error_is_read,
bool is_write_notifier)
{
@@ -101,41 +95,37 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
QEMUIOVector bounce_qiov;
void *bounce_buffer = NULL;
int ret = 0;
int64_t sectors_per_cluster = cluster_size_sectors(job);
int64_t start, end;
int n;
int64_t start, end; /* bytes */
int n; /* bytes */
qemu_co_rwlock_rdlock(&job->flush_rwlock);
start = sector_num / sectors_per_cluster;
end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
start = QEMU_ALIGN_DOWN(offset, job->cluster_size);
end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size);
trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
trace_backup_do_cow_enter(job, start, offset, bytes);
wait_for_overlapping_requests(job, start, end);
cow_request_begin(&cow_request, job, start, end);
for (; start < end; start++) {
if (test_bit(start, job->done_bitmap)) {
for (; start < end; start += job->cluster_size) {
if (test_bit(start / job->cluster_size, job->done_bitmap)) {
trace_backup_do_cow_skip(job, start);
continue; /* already copied */
}
trace_backup_do_cow_process(job, start);
n = MIN(sectors_per_cluster,
job->common.len / BDRV_SECTOR_SIZE -
start * sectors_per_cluster);
n = MIN(job->cluster_size, job->common.len - start);
if (!bounce_buffer) {
bounce_buffer = blk_blockalign(blk, job->cluster_size);
}
iov.iov_base = bounce_buffer;
iov.iov_len = n * BDRV_SECTOR_SIZE;
iov.iov_len = n;
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
ret = blk_co_preadv(blk, start * job->cluster_size,
bounce_qiov.size, &bounce_qiov,
ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
if (ret < 0) {
trace_backup_do_cow_read_fail(job, start, ret);
@@ -146,10 +136,10 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
}
if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
ret = blk_co_pwrite_zeroes(job->target, start * job->cluster_size,
ret = blk_co_pwrite_zeroes(job->target, start,
bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
} else {
ret = blk_co_pwritev(job->target, start * job->cluster_size,
ret = blk_co_pwritev(job->target, start,
bounce_qiov.size, &bounce_qiov,
job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
}
@@ -161,13 +151,13 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
goto out;
}
set_bit(start, job->done_bitmap);
set_bit(start / job->cluster_size, job->done_bitmap);
/* Publish progress, guest I/O counts as progress too. Note that the
* offset field is an opaque progress value, it is not a disk offset.
*/
job->sectors_read += n;
job->common.offset += n * BDRV_SECTOR_SIZE;
job->bytes_read += n;
job->common.offset += n;
}
out:
@@ -177,7 +167,7 @@ out:
cow_request_end(&cow_request);
trace_backup_do_cow_return(job, sector_num, nb_sectors, ret);
trace_backup_do_cow_return(job, offset, bytes, ret);
qemu_co_rwlock_unlock(&job->flush_rwlock);
@@ -190,14 +180,12 @@ static int coroutine_fn backup_before_write_notify(
{
BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
BdrvTrackedRequest *req = opaque;
int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
assert(req->bs == blk_bs(job->common.blk));
assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(QEMU_IS_ALIGNED(req->offset, BDRV_SECTOR_SIZE));
assert(QEMU_IS_ALIGNED(req->bytes, BDRV_SECTOR_SIZE));
return backup_do_cow(job, sector_num, nb_sectors, NULL, true);
return backup_do_cow(job, req->offset, req->bytes, NULL, true);
}
static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -208,7 +196,7 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
}
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
@@ -275,32 +263,29 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
bitmap_zero(backup_job->done_bitmap, len);
}
void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
int nb_sectors)
void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset,
uint64_t bytes)
{
BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
int64_t start, end;
assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
start = sector_num / sectors_per_cluster;
end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
wait_for_overlapping_requests(backup_job, start, end);
}
void backup_cow_request_begin(CowRequest *req, BlockJob *job,
int64_t sector_num,
int nb_sectors)
int64_t offset, uint64_t bytes)
{
BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
int64_t start, end;
assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
start = sector_num / sectors_per_cluster;
end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
cow_request_begin(req, backup_job, start, end);
}
@@ -359,8 +344,8 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
*/
if (job->common.speed) {
uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
job->sectors_read);
job->sectors_read = 0;
job->bytes_read);
job->bytes_read = 0;
block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
} else {
block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
@@ -379,11 +364,10 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
int ret = 0;
int clusters_per_iter;
uint32_t granularity;
int64_t sector;
int64_t offset;
int64_t cluster;
int64_t end;
int64_t last_cluster = -1;
int64_t sectors_per_cluster = cluster_size_sectors(job);
BdrvDirtyBitmapIter *dbi;
granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
@@ -391,8 +375,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0);
/* Find the next dirty sector(s) */
while ((sector = bdrv_dirty_iter_next(dbi)) != -1) {
cluster = sector / sectors_per_cluster;
while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) {
cluster = offset / job->cluster_size;
/* Fake progress updates for any clusters we skipped */
if (cluster != last_cluster + 1) {
@@ -405,8 +389,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
if (yield_and_check(job)) {
goto out;
}
ret = backup_do_cow(job, cluster * sectors_per_cluster,
sectors_per_cluster, &error_is_read,
ret = backup_do_cow(job, cluster * job->cluster_size,
job->cluster_size, &error_is_read,
false);
if ((ret < 0) &&
backup_error_action(job, error_is_read, -ret) ==
@@ -419,7 +403,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
/* If the bitmap granularity is smaller than the backup granularity,
* we need to advance the iterator pointer to the next cluster. */
if (granularity < job->cluster_size) {
bdrv_set_dirty_iter(dbi, cluster * sectors_per_cluster);
bdrv_set_dirty_iter(dbi,
cluster * job->cluster_size / BDRV_SECTOR_SIZE);
}
last_cluster = cluster - 1;
@@ -441,17 +426,14 @@ static void coroutine_fn backup_run(void *opaque)
BackupBlockJob *job = opaque;
BackupCompleteData *data;
BlockDriverState *bs = blk_bs(job->common.blk);
int64_t start, end;
int64_t sectors_per_cluster = cluster_size_sectors(job);
int64_t offset;
int ret = 0;
QLIST_INIT(&job->inflight_reqs);
qemu_co_rwlock_init(&job->flush_rwlock);
start = 0;
end = DIV_ROUND_UP(job->common.len, job->cluster_size);
job->done_bitmap = bitmap_new(end);
job->done_bitmap = bitmap_new(DIV_ROUND_UP(job->common.len,
job->cluster_size));
job->before_write.notify = backup_before_write_notify;
bdrv_add_before_write_notifier(bs, &job->before_write);
@@ -466,7 +448,8 @@ static void coroutine_fn backup_run(void *opaque)
ret = backup_run_incremental(job);
} else {
/* Both FULL and TOP SYNC_MODE's require copying.. */
for (; start < end; start++) {
for (offset = 0; offset < job->common.len;
offset += job->cluster_size) {
bool error_is_read;
int alloced = 0;
@@ -475,12 +458,13 @@ static void coroutine_fn backup_run(void *opaque)
}
if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
int i, n;
int i;
int64_t n;
/* Check to see if these blocks are already in the
* backing file. */
for (i = 0; i < sectors_per_cluster;) {
for (i = 0; i < job->cluster_size;) {
/* bdrv_is_allocated() only returns true/false based
* on the first set of sectors it comes across that
* are are all in the same state.
@@ -488,9 +472,8 @@ static void coroutine_fn backup_run(void *opaque)
* backup cluster length. We end up copying more than
* needed but at some point that is always the case. */
alloced =
bdrv_is_allocated(bs,
start * sectors_per_cluster + i,
sectors_per_cluster - i, &n);
bdrv_is_allocated(bs, offset + i,
job->cluster_size - i, &n);
i += n;
if (alloced || n == 0) {
@@ -508,9 +491,8 @@ static void coroutine_fn backup_run(void *opaque)
if (alloced < 0) {
ret = alloced;
} else {
ret = backup_do_cow(job, start * sectors_per_cluster,
sectors_per_cluster, &error_is_read,
false);
ret = backup_do_cow(job, offset, job->cluster_size,
&error_is_read, false);
}
if (ret < 0) {
/* Depending on error action, fail now or retry cluster */
@@ -519,7 +501,7 @@ static void coroutine_fn backup_run(void *opaque)
if (action == BLOCK_ERROR_ACTION_REPORT) {
break;
} else {
start--;
offset -= job->cluster_size;
continue;
}
}
@@ -657,12 +639,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
ret = bdrv_get_info(target, &bdi);
if (ret == -ENOTSUP && !target->backing) {
/* Cluster size is not defined */
error_report("WARNING: The target block device doesn't provide "
"information about the block size and it doesn't have a "
"backing file. The default block size of %u bytes is "
"used. If the actual block size of the target exceeds "
"this default, the backup may be unusable",
BACKUP_CLUSTER_SIZE_DEFAULT);
warn_report("The target block device doesn't provide "
"information about the block size and it doesn't have a "
"backing file. The default block size of %u bytes is "
"used. If the actual block size of the target exceeds "
"this default, the backup may be unusable",
BACKUP_CLUSTER_SIZE_DEFAULT);
job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
} else if (ret < 0 && !target->backing) {
error_setg_errno(errp, -ret,
@@ -692,7 +674,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
}
if (job) {
backup_clean(&job->common);
block_job_unref(&job->common);
block_job_early_fail(&job->common);
}
return NULL;

View File

@@ -31,7 +31,6 @@
#include "qemu/module.h"
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
#include "sysemu/qtest.h"
@@ -576,7 +575,7 @@ static int blkdebug_co_flush(BlockDriverState *bs)
}
static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count,
int64_t offset, int bytes,
BdrvRequestFlags flags)
{
uint32_t align = MAX(bs->bl.request_alignment,
@@ -587,29 +586,29 @@ static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
* preferred alignment (so that we test the fallback to writes on
* unaligned portions), and check that the block layer never hands
* us anything unaligned that crosses an alignment boundary. */
if (count < align) {
if (bytes < align) {
assert(QEMU_IS_ALIGNED(offset, align) ||
QEMU_IS_ALIGNED(offset + count, align) ||
QEMU_IS_ALIGNED(offset + bytes, align) ||
DIV_ROUND_UP(offset, align) ==
DIV_ROUND_UP(offset + count, align));
DIV_ROUND_UP(offset + bytes, align));
return -ENOTSUP;
}
assert(QEMU_IS_ALIGNED(offset, align));
assert(QEMU_IS_ALIGNED(count, align));
assert(QEMU_IS_ALIGNED(bytes, align));
if (bs->bl.max_pwrite_zeroes) {
assert(count <= bs->bl.max_pwrite_zeroes);
assert(bytes <= bs->bl.max_pwrite_zeroes);
}
err = rule_check(bs, offset, count);
err = rule_check(bs, offset, bytes);
if (err) {
return err;
}
return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
}
static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
int64_t offset, int bytes)
{
uint32_t align = bs->bl.pdiscard_alignment;
int err;
@@ -617,29 +616,39 @@ static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
/* Only pass through requests that are larger than requested
* minimum alignment, and ensure that unaligned requests do not
* cross optimum discard boundaries. */
if (count < bs->bl.request_alignment) {
if (bytes < bs->bl.request_alignment) {
assert(QEMU_IS_ALIGNED(offset, align) ||
QEMU_IS_ALIGNED(offset + count, align) ||
QEMU_IS_ALIGNED(offset + bytes, align) ||
DIV_ROUND_UP(offset, align) ==
DIV_ROUND_UP(offset + count, align));
DIV_ROUND_UP(offset + bytes, align));
return -ENOTSUP;
}
assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment));
if (align && count >= align) {
assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
if (align && bytes >= align) {
assert(QEMU_IS_ALIGNED(offset, align));
assert(QEMU_IS_ALIGNED(count, align));
assert(QEMU_IS_ALIGNED(bytes, align));
}
if (bs->bl.max_pdiscard) {
assert(count <= bs->bl.max_pdiscard);
assert(bytes <= bs->bl.max_pdiscard);
}
err = rule_check(bs, offset, count);
err = rule_check(bs, offset, bytes);
if (err) {
return err;
}
return bdrv_co_pdiscard(bs->file->bs, offset, count);
return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
}
static int64_t coroutine_fn blkdebug_co_get_block_status(
BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
BlockDriverState **file)
{
*pnum = nb_sectors;
*file = bs->file->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
(sector_num << BDRV_SECTOR_BITS);
}
static void blkdebug_close(BlockDriverState *bs)
@@ -812,9 +821,10 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
return bdrv_getlength(bs->file->bs);
}
static int blkdebug_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
static int blkdebug_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
return bdrv_truncate(bs->file, offset, errp);
return bdrv_truncate(bs->file, offset, prealloc, errp);
}
static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
@@ -840,9 +850,13 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
}
if (!force_json && bs->file->bs->exact_filename[0]) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkdebug:%s:%s", s->config_file ?: "",
bs->file->bs->exact_filename);
int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkdebug:%s:%s", s->config_file ?: "",
bs->file->bs->exact_filename);
if (ret >= sizeof(bs->exact_filename)) {
/* An overflow makes the filename unusable, so do not report any */
bs->exact_filename[0] = 0;
}
}
opts = qdict_new();
@@ -912,6 +926,7 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_co_flush_to_disk = blkdebug_co_flush,
.bdrv_co_pwrite_zeroes = blkdebug_co_pwrite_zeroes,
.bdrv_co_pdiscard = blkdebug_co_pdiscard,
.bdrv_co_get_block_status = blkdebug_co_get_block_status,
.bdrv_debug_event = blkdebug_debug_event,
.bdrv_debug_breakpoint = blkdebug_debug_breakpoint,

View File

@@ -96,10 +96,10 @@ static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
}
static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
int64_t offset, int bytes, BdrvRequestFlags flags)
{
uint64_t reqid = blkreplay_next_id();
int ret = bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
int ret = bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
@@ -107,10 +107,10 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
}
static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
int64_t offset, int bytes)
{
uint64_t reqid = blkreplay_next_id();
int ret = bdrv_co_pdiscard(bs->file->bs, offset, count);
int ret = bdrv_co_pdiscard(bs->file->bs, offset, bytes);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();

View File

@@ -301,10 +301,14 @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
if (bs->file->bs->exact_filename[0]
&& s->test_file->bs->exact_filename[0])
{
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkverify:%s:%s",
bs->file->bs->exact_filename,
s->test_file->bs->exact_filename);
int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkverify:%s:%s",
bs->file->bs->exact_filename,
s->test_file->bs->exact_filename);
if (ret >= sizeof(bs->exact_filename)) {
/* An overflow makes the filename unusable, so do not report any */
bs->exact_filename[0] = 0;
}
}
}

View File

@@ -83,7 +83,6 @@ static const AIOCBInfo block_backend_aiocb_info = {
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
static char *blk_get_attached_dev_id(BlockBackend *blk);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -168,7 +167,7 @@ static int blk_root_inactivate(BdrvChild *child)
* this point because the VM is stopped) and unattached monitor-owned
* BlockBackends. If there is still any other user like a block job, then
* we simply can't inactivate the image. */
if (!blk->dev && !blk->name[0]) {
if (!blk->dev && !blk_name(blk)[0]) {
return -EPERM;
}
@@ -216,8 +215,10 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
blk->shared_perm = shared_perm;
blk_set_enable_write_cache(blk, true);
qemu_co_mutex_init(&blk->public.throttled_reqs_lock);
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
qemu_co_queue_init(&blk->public.throttled_reqs[1]);
block_acct_init(&blk->stats);
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
@@ -341,7 +342,7 @@ void blk_unref(BlockBackend *blk)
* Behaves similarly to blk_next() but iterates over all BlockBackends, even the
* ones which are hidden (i.e. are not referenced by the monitor).
*/
static BlockBackend *blk_all_next(BlockBackend *blk)
BlockBackend *blk_all_next(BlockBackend *blk)
{
return blk ? QTAILQ_NEXT(blk, link)
: QTAILQ_FIRST(&block_backends);
@@ -724,7 +725,7 @@ void *blk_get_attached_dev(BlockBackend *blk)
/* Return the qdev ID, or if no ID is assigned the QOM path, of the block
* device attached to the BlockBackend. */
static char *blk_get_attached_dev_id(BlockBackend *blk)
char *blk_get_attached_dev_id(BlockBackend *blk)
{
DeviceState *dev;
@@ -1097,9 +1098,9 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
}
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int count, BdrvRequestFlags flags)
int bytes, BdrvRequestFlags flags)
{
return blk_prw(blk, offset, NULL, count, blk_write_entry,
return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
flags | BDRV_REQ_ZERO_WRITE);
}
@@ -1309,10 +1310,10 @@ static void blk_aio_pdiscard_entry(void *opaque)
}
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
int64_t offset, int count,
int64_t offset, int bytes,
BlockCompletionFunc *cb, void *opaque)
{
return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0,
return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
cb, opaque);
}
@@ -1372,14 +1373,14 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
}
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
int ret = blk_check_byte_request(blk, offset, count);
int ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
return bdrv_co_pdiscard(blk_bs(blk), offset, count);
return bdrv_co_pdiscard(blk_bs(blk), offset, bytes);
}
int blk_co_flush(BlockBackend *blk)
@@ -1758,9 +1759,9 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
}
int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int count, BdrvRequestFlags flags)
int bytes, BdrvRequestFlags flags)
{
return blk_co_pwritev(blk, offset, count, NULL,
return blk_co_pwritev(blk, offset, bytes, NULL,
flags | BDRV_REQ_ZERO_WRITE);
}
@@ -1771,14 +1772,15 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
BDRV_REQ_WRITE_COMPRESSED);
}
int blk_truncate(BlockBackend *blk, int64_t offset, Error **errp)
int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
Error **errp)
{
if (!blk_is_available(blk)) {
error_setg(errp, "No medium inserted");
return -ENOMEDIUM;
}
return bdrv_truncate(blk->root, offset, errp);
return bdrv_truncate(blk->root, offset, prealloc, errp);
}
static void blk_pdiscard_entry(void *opaque)
@@ -1787,9 +1789,9 @@ static void blk_pdiscard_entry(void *opaque)
rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
}
int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0);
return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
@@ -1953,7 +1955,7 @@ static void blk_root_drained_begin(BdrvChild *child)
/* Note that blk->root may not be accessible here yet if we are just
* attaching to a BlockDriverState that is drained. Use child instead. */
if (blk->public.io_limits_disabled++ == 0) {
if (atomic_fetch_inc(&blk->public.io_limits_disabled) == 0) {
throttle_group_restart_blk(blk);
}
}
@@ -1964,7 +1966,7 @@ static void blk_root_drained_end(BdrvChild *child)
assert(blk->quiesce_counter);
assert(blk->public.io_limits_disabled);
--blk->public.io_limits_disabled;
atomic_dec(&blk->public.io_limits_disabled);
if (--blk->quiesce_counter == 0) {
if (blk->dev_ops && blk->dev_ops->drained_end) {

View File

@@ -47,26 +47,25 @@ typedef struct CommitBlockJob {
} CommitBlockJob;
static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
int64_t sector_num, int nb_sectors,
int64_t offset, uint64_t bytes,
void *buf)
{
int ret = 0;
QEMUIOVector qiov;
struct iovec iov = {
.iov_base = buf,
.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
.iov_len = bytes,
};
assert(bytes < SIZE_MAX);
qemu_iovec_init_external(&qiov, &iov, 1);
ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE,
qiov.size, &qiov, 0);
ret = blk_co_preadv(bs, offset, qiov.size, &qiov, 0);
if (ret < 0) {
return ret;
}
ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE,
qiov.size, &qiov, 0);
ret = blk_co_pwritev(base, offset, qiov.size, &qiov, 0);
if (ret < 0) {
return ret;
}
@@ -89,6 +88,12 @@ static void commit_complete(BlockJob *job, void *opaque)
int ret = data->ret;
bool remove_commit_top_bs = false;
/* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */
bdrv_ref(top);
if (overlay_bs) {
bdrv_ref(overlay_bs);
}
/* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
* the normal backing chain can be restored. */
blk_unref(s->base);
@@ -115,6 +120,13 @@ static void commit_complete(BlockJob *job, void *opaque)
}
g_free(s->backing_file_str);
blk_unref(s->top);
/* If there is more than one reference to the job (e.g. if called from
* block_job_finish_sync()), block_job_completed() won't free it and
* therefore the blockers on the intermediate nodes remain. This would
* cause bdrv_set_backing_hd() to fail. */
block_job_remove_all_bdrv(job);
block_job_completed(&s->common, ret);
g_free(data);
@@ -124,23 +136,25 @@ static void commit_complete(BlockJob *job, void *opaque)
if (remove_commit_top_bs) {
bdrv_set_backing_hd(overlay_bs, top, &error_abort);
}
bdrv_unref(overlay_bs);
bdrv_unref(top);
}
static void coroutine_fn commit_run(void *opaque)
{
CommitBlockJob *s = opaque;
CommitCompleteData *data;
int64_t sector_num, end;
int64_t offset;
uint64_t delay_ns = 0;
int ret = 0;
int n = 0;
int64_t n = 0; /* bytes */
void *buf = NULL;
int bytes_written = 0;
int64_t base_len;
ret = s->common.len = blk_getlength(s->top);
if (s->common.len < 0) {
goto out;
}
@@ -151,16 +165,15 @@ static void coroutine_fn commit_run(void *opaque)
}
if (base_len < s->common.len) {
ret = blk_truncate(s->base, s->common.len, NULL);
ret = blk_truncate(s->base, s->common.len, PREALLOC_MODE_OFF, NULL);
if (ret) {
goto out;
}
}
end = s->common.len >> BDRV_SECTOR_BITS;
buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
for (sector_num = 0; sector_num < end; sector_num += n) {
for (offset = 0; offset < s->common.len; offset += n) {
bool copy;
/* Note that even when no rate limit is applied we need to yield
@@ -172,14 +185,12 @@ static void coroutine_fn commit_run(void *opaque)
}
/* Copy if allocated above the base */
ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
sector_num,
COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
&n);
offset, COMMIT_BUFFER_SIZE, &n);
copy = (ret == 1);
trace_commit_one_iteration(s, sector_num, n, ret);
trace_commit_one_iteration(s, offset, n, ret);
if (copy) {
ret = commit_populate(s->top, s->base, sector_num, n, buf);
bytes_written += n * BDRV_SECTOR_SIZE;
ret = commit_populate(s->top, s->base, offset, n, buf);
bytes_written += n;
}
if (ret < 0) {
BlockErrorAction action =
@@ -192,7 +203,7 @@ static void coroutine_fn commit_run(void *opaque)
}
}
/* Publish progress */
s->common.offset += n * BDRV_SECTOR_SIZE;
s->common.offset += n;
if (copy && s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, n);
@@ -217,7 +228,7 @@ static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
}
static const BlockJobDriver commit_job_driver = {
@@ -239,7 +250,7 @@ static int64_t coroutine_fn bdrv_commit_top_get_block_status(
{
*pnum = nb_sectors;
*file = bs->backing->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
(sector_num << BDRV_SECTOR_BITS);
}
@@ -426,11 +437,11 @@ fail:
if (commit_top_bs) {
bdrv_set_backing_hd(overlay_bs, top, &error_abort);
}
block_job_unref(&s->common);
block_job_early_fail(&s->common);
}
#define COMMIT_BUF_SECTORS 2048
#define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE)
/* commit COW file into the raw image */
int bdrv_commit(BlockDriverState *bs)
@@ -439,8 +450,9 @@ int bdrv_commit(BlockDriverState *bs)
BlockDriverState *backing_file_bs = NULL;
BlockDriverState *commit_top_bs = NULL;
BlockDriver *drv = bs->drv;
int64_t sector, total_sectors, length, backing_length;
int n, ro, open_flags;
int64_t offset, length, backing_length;
int ro, open_flags;
int64_t n;
int ret = 0;
uint8_t *buf = NULL;
Error *local_err = NULL;
@@ -511,37 +523,33 @@ int bdrv_commit(BlockDriverState *bs)
* grow the backing file image if possible. If not possible,
* we must return an error */
if (length > backing_length) {
ret = blk_truncate(backing, length, &local_err);
ret = blk_truncate(backing, length, PREALLOC_MODE_OFF, &local_err);
if (ret < 0) {
error_report_err(local_err);
goto ro_cleanup;
}
}
total_sectors = length >> BDRV_SECTOR_BITS;
/* blk_try_blockalign() for src will choose an alignment that works for
* backing as well, so no need to compare the alignment manually. */
buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
buf = blk_try_blockalign(src, COMMIT_BUF_SIZE);
if (buf == NULL) {
ret = -ENOMEM;
goto ro_cleanup;
}
for (sector = 0; sector < total_sectors; sector += n) {
ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
for (offset = 0; offset < length; offset += n) {
ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n);
if (ret < 0) {
goto ro_cleanup;
}
if (ret) {
ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
n * BDRV_SECTOR_SIZE);
ret = blk_pread(src, offset, buf, n);
if (ret < 0) {
goto ro_cleanup;
}
ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
n * BDRV_SECTOR_SIZE, 0);
ret = blk_pwrite(backing, offset, buf, n, 0);
if (ret < 0) {
goto ro_cleanup;
}

View File

@@ -24,16 +24,10 @@
#include "sysemu/block-backend.h"
#include "crypto/block.h"
#include "qapi/opts-visitor.h"
#include "qapi/qobject-input-visitor.h"
#include "qapi-visit.h"
#include "qapi/error.h"
#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time"
#include "block/crypto.h"
typedef struct BlockCrypto BlockCrypto;
@@ -135,11 +129,7 @@ static QemuOptsList block_crypto_runtime_opts_luks = {
.name = "crypto",
.head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head),
.desc = {
{
.name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
.type = QEMU_OPT_STRING,
.help = "ID of the secret that provides the encryption key",
},
BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""),
{ /* end of list */ }
},
};
@@ -154,49 +144,21 @@ static QemuOptsList block_crypto_create_opts_luks = {
.type = QEMU_OPT_SIZE,
.help = "Virtual disk size"
},
{
.name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
.type = QEMU_OPT_STRING,
.help = "ID of the secret that provides the encryption key",
},
{
.name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,
.type = QEMU_OPT_STRING,
.help = "Name of encryption cipher algorithm",
},
{
.name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,
.type = QEMU_OPT_STRING,
.help = "Name of encryption cipher mode",
},
{
.name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG,
.type = QEMU_OPT_STRING,
.help = "Name of IV generator algorithm",
},
{
.name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,
.type = QEMU_OPT_STRING,
.help = "Name of IV generator hash algorithm",
},
{
.name = BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,
.type = QEMU_OPT_STRING,
.help = "Name of encryption hash algorithm",
},
{
.name = BLOCK_CRYPTO_OPT_LUKS_ITER_TIME,
.type = QEMU_OPT_NUMBER,
.help = "Time to spend in PBKDF in milliseconds",
},
BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""),
BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(""),
BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(""),
BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(""),
BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(""),
BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(""),
BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(""),
{ /* end of list */ }
},
};
static QCryptoBlockOpenOptions *
QCryptoBlockOpenOptions *
block_crypto_open_opts_init(QCryptoBlockFormat format,
QemuOpts *opts,
QDict *opts,
Error **errp)
{
Visitor *v;
@@ -206,7 +168,7 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
ret = g_new0(QCryptoBlockOpenOptions, 1);
ret->format = format;
v = opts_visitor_new(opts);
v = qobject_input_visitor_new_keyval(QOBJECT(opts));
visit_start_struct(v, NULL, NULL, 0, &local_err);
if (local_err) {
@@ -219,6 +181,11 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
v, &ret->u.luks, &local_err);
break;
case Q_CRYPTO_BLOCK_FORMAT_QCOW:
visit_type_QCryptoBlockOptionsQCow_members(
v, &ret->u.qcow, &local_err);
break;
default:
error_setg(&local_err, "Unsupported block format %d", format);
break;
@@ -240,9 +207,9 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
}
static QCryptoBlockCreateOptions *
QCryptoBlockCreateOptions *
block_crypto_create_opts_init(QCryptoBlockFormat format,
QemuOpts *opts,
QDict *opts,
Error **errp)
{
Visitor *v;
@@ -252,7 +219,7 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
ret = g_new0(QCryptoBlockCreateOptions, 1);
ret->format = format;
v = opts_visitor_new(opts);
v = qobject_input_visitor_new_keyval(QOBJECT(opts));
visit_start_struct(v, NULL, NULL, 0, &local_err);
if (local_err) {
@@ -265,6 +232,11 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
v, &ret->u.luks, &local_err);
break;
case Q_CRYPTO_BLOCK_FORMAT_QCOW:
visit_type_QCryptoBlockOptionsQCow_members(
v, &ret->u.qcow, &local_err);
break;
default:
error_setg(&local_err, "Unsupported block format %d", format);
break;
@@ -299,6 +271,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
int ret = -EINVAL;
QCryptoBlockOpenOptions *open_opts = NULL;
unsigned int cflags = 0;
QDict *cryptoopts = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
@@ -313,7 +286,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
goto cleanup;
}
open_opts = block_crypto_open_opts_init(format, opts, errp);
cryptoopts = qemu_opts_to_qdict(opts, NULL);
open_opts = block_crypto_open_opts_init(format, cryptoopts, errp);
if (!open_opts) {
goto cleanup;
}
@@ -321,7 +296,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
if (flags & BDRV_O_NO_IO) {
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
}
crypto->block = qcrypto_block_open(open_opts,
crypto->block = qcrypto_block_open(open_opts, NULL,
block_crypto_read_func,
bs,
cflags,
@@ -333,10 +308,10 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
}
bs->encrypted = true;
bs->valid_key = true;
ret = 0;
cleanup:
QDECREF(cryptoopts);
qapi_free_QCryptoBlockOpenOptions(open_opts);
return ret;
}
@@ -356,13 +331,16 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,
.opts = opts,
.filename = filename,
};
QDict *cryptoopts;
create_opts = block_crypto_create_opts_init(format, opts, errp);
cryptoopts = qemu_opts_to_qdict(opts, NULL);
create_opts = block_crypto_create_opts_init(format, cryptoopts, errp);
if (!create_opts) {
return -1;
}
crypto = qcrypto_block_create(create_opts,
crypto = qcrypto_block_create(create_opts, NULL,
block_crypto_init_func,
block_crypto_write_func,
&data,
@@ -375,6 +353,7 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,
ret = 0;
cleanup:
QDECREF(cryptoopts);
qcrypto_block_free(crypto);
blk_unref(data.blk);
qapi_free_QCryptoBlockCreateOptions(create_opts);
@@ -382,7 +361,7 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,
}
static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,
Error **errp)
PreallocMode prealloc, Error **errp)
{
BlockCrypto *crypto = bs->opaque;
size_t payload_offset =
@@ -390,7 +369,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,
offset += payload_offset;
return bdrv_truncate(bs->file, offset, errp);
return bdrv_truncate(bs->file, offset, prealloc, errp);
}
static void block_crypto_close(BlockDriverState *bs)

101
block/crypto.h Normal file
View File

@@ -0,0 +1,101 @@
/*
* QEMU block full disk encryption
*
* Copyright (c) 2015-2017 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
*/
#ifndef BLOCK_CRYPTO_H__
#define BLOCK_CRYPTO_H__
#define BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, helpstr) \
{ \
.name = prefix BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET, \
.type = QEMU_OPT_STRING, \
.help = helpstr, \
}
#define BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET "key-secret"
#define BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET(prefix) \
BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, \
"ID of the secret that provides the AES encryption key")
#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time"
#define BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(prefix) \
BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, \
"ID of the secret that provides the keyslot passphrase")
#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(prefix) \
{ \
.name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG, \
.type = QEMU_OPT_STRING, \
.help = "Name of encryption cipher algorithm", \
}
#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(prefix) \
{ \
.name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE, \
.type = QEMU_OPT_STRING, \
.help = "Name of encryption cipher mode", \
}
#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(prefix) \
{ \
.name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG, \
.type = QEMU_OPT_STRING, \
.help = "Name of IV generator algorithm", \
}
#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(prefix) \
{ \
.name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG, \
.type = QEMU_OPT_STRING, \
.help = "Name of IV generator hash algorithm", \
}
#define BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(prefix) \
{ \
.name = prefix BLOCK_CRYPTO_OPT_LUKS_HASH_ALG, \
.type = QEMU_OPT_STRING, \
.help = "Name of encryption hash algorithm", \
}
#define BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(prefix) \
{ \
.name = prefix BLOCK_CRYPTO_OPT_LUKS_ITER_TIME, \
.type = QEMU_OPT_NUMBER, \
.help = "Time to spend in PBKDF in milliseconds", \
}
QCryptoBlockCreateOptions *
block_crypto_create_opts_init(QCryptoBlockFormat format,
QDict *opts,
Error **errp);
QCryptoBlockOpenOptions *
block_crypto_open_opts_init(QCryptoBlockFormat format,
QDict *opts,
Error **errp);
#endif /* BLOCK_CRYPTO_H__ */

View File

@@ -37,13 +37,24 @@
* or enabled. A frozen bitmap can only abdicate() or reclaim().
*/
struct BdrvDirtyBitmap {
QemuMutex *mutex;
HBitmap *bitmap; /* Dirty sector bitmap implementation */
HBitmap *meta; /* Meta dirty bitmap */
BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
char *name; /* Optional non-empty unique ID */
int64_t size; /* Size of the bitmap (Number of sectors) */
bool disabled; /* Bitmap is read-only */
bool disabled; /* Bitmap is disabled. It ignores all writes to
the device */
int active_iterators; /* How many iterators are active */
bool readonly; /* Bitmap is read-only. This field also
prevents the respective image from being
modified (i.e. blocks writes and discards).
Such operations must fail and both the image
and this bitmap must remain unchanged while
this flag is set. */
bool autoload; /* For persistent bitmaps: bitmap must be
autoloaded on image opening */
bool persistent; /* bitmap must be saved to owner disk image */
QLIST_ENTRY(BdrvDirtyBitmap) list;
};
@@ -52,6 +63,27 @@ struct BdrvDirtyBitmapIter {
BdrvDirtyBitmap *bitmap;
};
static inline void bdrv_dirty_bitmaps_lock(BlockDriverState *bs)
{
qemu_mutex_lock(&bs->dirty_bitmap_mutex);
}
static inline void bdrv_dirty_bitmaps_unlock(BlockDriverState *bs)
{
qemu_mutex_unlock(&bs->dirty_bitmap_mutex);
}
void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap)
{
qemu_mutex_lock(bitmap->mutex);
}
void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap)
{
qemu_mutex_unlock(bitmap->mutex);
}
/* Called with BQL or dirty_bitmap lock taken. */
BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
{
BdrvDirtyBitmap *bm;
@@ -65,13 +97,17 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
return NULL;
}
/* Called with BQL taken. */
void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
g_free(bitmap->name);
bitmap->name = NULL;
bitmap->persistent = false;
bitmap->autoload = false;
}
/* Called with BQL taken. */
BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
uint32_t granularity,
const char *name,
@@ -96,11 +132,14 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
return NULL;
}
bitmap = g_new0(BdrvDirtyBitmap, 1);
bitmap->mutex = &bs->dirty_bitmap_mutex;
bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
bitmap->size = bitmap_size;
bitmap->name = g_strdup(name);
bitmap->disabled = false;
bdrv_dirty_bitmaps_lock(bs);
QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
bdrv_dirty_bitmaps_unlock(bs);
return bitmap;
}
@@ -119,20 +158,24 @@ void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int chunk_size)
{
assert(!bitmap->meta);
qemu_mutex_lock(bitmap->mutex);
bitmap->meta = hbitmap_create_meta(bitmap->bitmap,
chunk_size * BITS_PER_BYTE);
qemu_mutex_unlock(bitmap->mutex);
}
void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(bitmap->meta);
qemu_mutex_lock(bitmap->mutex);
hbitmap_free_meta(bitmap->bitmap);
bitmap->meta = NULL;
qemu_mutex_unlock(bitmap->mutex);
}
int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, int64_t sector,
int nb_sectors)
int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, int64_t sector,
int nb_sectors)
{
uint64_t i;
int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta);
@@ -147,11 +190,26 @@ int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
return false;
}
int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, int64_t sector,
int nb_sectors)
{
bool dirty;
qemu_mutex_lock(bitmap->mutex);
dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors);
qemu_mutex_unlock(bitmap->mutex);
return dirty;
}
void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, int64_t sector,
int nb_sectors)
{
qemu_mutex_lock(bitmap->mutex);
hbitmap_reset(bitmap->meta, sector, nb_sectors);
qemu_mutex_unlock(bitmap->mutex);
}
int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
@@ -164,16 +222,19 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
return bitmap->name;
}
/* Called with BQL taken. */
bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
{
return bitmap->successor;
}
/* Called with BQL taken. */
bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
{
return !(bitmap->disabled || bitmap->successor);
}
/* Called with BQL taken. */
DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
{
if (bdrv_dirty_bitmap_frozen(bitmap)) {
@@ -188,6 +249,7 @@ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
/**
* Create a successor bitmap destined to replace this bitmap after an operation.
* Requires that the bitmap is not frozen and has no successor.
* Called with BQL taken.
*/
int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, Error **errp)
@@ -220,6 +282,7 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
/**
* For a bitmap with a successor, yield our name to the successor,
* delete the old bitmap, and return a handle to the new bitmap.
* Called with BQL taken.
*/
BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
@@ -238,6 +301,10 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
bitmap->name = NULL;
successor->name = name;
bitmap->successor = NULL;
successor->persistent = bitmap->persistent;
bitmap->persistent = false;
successor->autoload = bitmap->autoload;
bitmap->autoload = false;
bdrv_release_dirty_bitmap(bs, bitmap);
return successor;
@@ -247,6 +314,7 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
* In cases of failure where we can no longer safely delete the parent,
* we may wish to re-join the parent and child/successor.
* The merged parent will be un-frozen, but not explicitly re-enabled.
* Called with BQL taken.
*/
BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *parent,
@@ -271,27 +339,37 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
/**
* Truncates _all_ bitmaps attached to a BDS.
* Called with BQL taken.
*/
void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
{
BdrvDirtyBitmap *bitmap;
uint64_t size = bdrv_nb_sectors(bs);
bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
assert(!bdrv_dirty_bitmap_frozen(bitmap));
assert(!bitmap->active_iterators);
hbitmap_truncate(bitmap->bitmap, size);
bitmap->size = size;
}
bdrv_dirty_bitmaps_unlock(bs);
}
static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
bool only_named)
static bool bdrv_dirty_bitmap_has_name(BdrvDirtyBitmap *bitmap)
{
return !!bdrv_dirty_bitmap_name(bitmap);
}
/* Called with BQL taken. */
static void bdrv_do_release_matching_dirty_bitmap(
BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
bool (*cond)(BdrvDirtyBitmap *bitmap))
{
BdrvDirtyBitmap *bm, *next;
bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
if ((!bitmap || bm == bitmap) && (!cond || cond(bm))) {
assert(!bm->active_iterators);
assert(!bdrv_dirty_bitmap_frozen(bm));
assert(!bm->meta);
@@ -301,35 +379,72 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
g_free(bm);
if (bitmap) {
return;
goto out;
}
}
}
if (bitmap) {
abort();
}
out:
bdrv_dirty_bitmaps_unlock(bs);
}
/* Called with BQL taken. */
void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
{
bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
bdrv_do_release_matching_dirty_bitmap(bs, bitmap, NULL);
}
/**
* Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
* There must not be any frozen bitmaps attached.
* This function does not remove persistent bitmaps from the storage.
* Called with BQL taken.
*/
void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
{
bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
bdrv_do_release_matching_dirty_bitmap(bs, NULL, bdrv_dirty_bitmap_has_name);
}
/**
* Release all persistent dirty bitmaps attached to a BDS (for use in
* bdrv_inactivate_recurse()).
* There must not be any frozen bitmaps attached.
* This function does not remove persistent bitmaps from the storage.
*/
void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs)
{
bdrv_do_release_matching_dirty_bitmap(bs, NULL,
bdrv_dirty_bitmap_get_persistance);
}
/**
* Remove persistent dirty bitmap from the storage if it exists.
* Absence of bitmap is not an error, because we have the following scenario:
* BdrvDirtyBitmap can have .persistent = true but not yet saved and have no
* stored version. For such bitmap bdrv_remove_persistent_dirty_bitmap() should
* not fail.
* This function doesn't release corresponding BdrvDirtyBitmap.
*/
void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
const char *name,
Error **errp)
{
if (bs->drv && bs->drv->bdrv_remove_persistent_dirty_bitmap) {
bs->drv->bdrv_remove_persistent_dirty_bitmap(bs, name, errp);
}
}
/* Called with BQL taken. */
void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
bitmap->disabled = true;
}
/* Called with BQL taken. */
void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
@@ -342,6 +457,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
BlockDirtyInfoList *list = NULL;
BlockDirtyInfoList **plist = &list;
bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
@@ -354,12 +470,14 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
*plist = entry;
plist = &entry->next;
}
bdrv_dirty_bitmaps_unlock(bs);
return list;
}
int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
int64_t sector)
/* Called within bdrv_dirty_bitmap_lock..unlock */
int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
int64_t sector)
{
if (bitmap) {
return hbitmap_get(bitmap->bitmap, sector);
@@ -388,7 +506,7 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
return granularity;
}
uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap)
{
return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
}
@@ -432,23 +550,45 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
return hbitmap_iter_next(&iter->hbi);
}
/* Called within bdrv_dirty_bitmap_lock..unlock */
void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int64_t nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
assert(!bdrv_dirty_bitmap_readonly(bitmap));
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int64_t nr_sectors)
{
bdrv_dirty_bitmap_lock(bitmap);
bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
bdrv_dirty_bitmap_unlock(bitmap);
}
/* Called within bdrv_dirty_bitmap_lock..unlock */
void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int64_t nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
assert(!bdrv_dirty_bitmap_readonly(bitmap));
hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
}
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int64_t nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
bdrv_dirty_bitmap_lock(bitmap);
bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
bdrv_dirty_bitmap_unlock(bitmap);
}
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
assert(!bdrv_dirty_bitmap_readonly(bitmap));
bdrv_dirty_bitmap_lock(bitmap);
if (!out) {
hbitmap_reset_all(bitmap->bitmap);
} else {
@@ -457,12 +597,14 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
hbitmap_granularity(backup));
*out = backup;
}
bdrv_dirty_bitmap_unlock(bitmap);
}
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
{
HBitmap *tmp = bitmap->bitmap;
assert(bdrv_dirty_bitmap_enabled(bitmap));
assert(!bdrv_dirty_bitmap_readonly(bitmap));
bitmap->bitmap = in;
hbitmap_free(tmp);
}
@@ -499,6 +641,13 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish);
}
void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
uint64_t start, uint64_t count,
bool finish)
{
hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish);
}
void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
{
hbitmap_deserialize_finish(bitmap->bitmap);
@@ -508,12 +657,20 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
int64_t nr_sectors)
{
BdrvDirtyBitmap *bitmap;
if (QLIST_EMPTY(&bs->dirty_bitmaps)) {
return;
}
bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
if (!bdrv_dirty_bitmap_enabled(bitmap)) {
continue;
}
assert(!bdrv_dirty_bitmap_readonly(bitmap));
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}
bdrv_dirty_bitmaps_unlock(bs);
}
/**
@@ -533,3 +690,78 @@ int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap)
{
return hbitmap_count(bitmap->meta);
}
bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap)
{
return bitmap->readonly;
}
/* Called with BQL taken. */
void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value)
{
qemu_mutex_lock(bitmap->mutex);
bitmap->readonly = value;
qemu_mutex_unlock(bitmap->mutex);
}
bool bdrv_has_readonly_bitmaps(BlockDriverState *bs)
{
BdrvDirtyBitmap *bm;
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
if (bm->readonly) {
return true;
}
}
return false;
}
/* Called with BQL taken. */
void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload)
{
qemu_mutex_lock(bitmap->mutex);
bitmap->autoload = autoload;
qemu_mutex_unlock(bitmap->mutex);
}
bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap)
{
return bitmap->autoload;
}
/* Called with BQL taken. */
void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent)
{
qemu_mutex_lock(bitmap->mutex);
bitmap->persistent = persistent;
qemu_mutex_unlock(bitmap->mutex);
}
bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap)
{
return bitmap->persistent;
}
bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs)
{
BdrvDirtyBitmap *bm;
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
if (bm->persistent && !bm->readonly) {
return true;
}
}
return false;
}
BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap)
{
return bitmap == NULL ? QLIST_FIRST(&bs->dirty_bitmaps) :
QLIST_NEXT(bitmap, list);
}
char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp)
{
return hbitmap_sha256(bitmap->bitmap, errp);
}

View File

@@ -381,12 +381,7 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
static void raw_parse_filename(const char *filename, QDict *options,
Error **errp)
{
/* The filename does not have to be prefixed by the protocol name, since
* "file" is the default protocol; therefore, the return value of this
* function call can be ignored. */
strstart(filename, "file:", &filename);
qdict_put_str(options, "filename", filename);
bdrv_parse_filename_strip_prefix(filename, "file:", options);
}
static QemuOptsList raw_runtime_opts = {
@@ -1490,7 +1485,7 @@ static int aio_worker(void *arg)
static int paio_submit_co(BlockDriverState *bs, int fd,
int64_t offset, QEMUIOVector *qiov,
int count, int type)
int bytes, int type)
{
RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
ThreadPool *pool;
@@ -1499,22 +1494,22 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
acb->aio_nbytes = count;
acb->aio_nbytes = bytes;
acb->aio_offset = offset;
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
assert(qiov->size == count);
assert(qiov->size == bytes);
}
trace_paio_submit_co(offset, count, type);
trace_paio_submit_co(offset, bytes, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_co(pool, aio_worker, acb);
}
static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
int64_t offset, QEMUIOVector *qiov, int count,
int64_t offset, QEMUIOVector *qiov, int bytes,
BlockCompletionFunc *cb, void *opaque, int type)
{
RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
@@ -1524,7 +1519,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
acb->aio_nbytes = count;
acb->aio_nbytes = bytes;
acb->aio_offset = offset;
if (qiov) {
@@ -1533,7 +1528,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
assert(qiov->size == acb->aio_nbytes);
}
trace_paio_submit(acb, opaque, offset, count, type);
trace_paio_submit(acb, opaque, offset, bytes, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
@@ -1629,7 +1624,122 @@ static void raw_close(BlockDriverState *bs)
}
}
static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
/**
* Truncates the given regular file @fd to @offset and, when growing, fills the
* new space according to @prealloc.
*
* Returns: 0 on success, -errno on failure.
*/
static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc,
Error **errp)
{
int result = 0;
int64_t current_length = 0;
char *buf = NULL;
struct stat st;
if (fstat(fd, &st) < 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not stat file");
return result;
}
current_length = st.st_size;
if (current_length > offset && prealloc != PREALLOC_MODE_OFF) {
error_setg(errp, "Cannot use preallocation for shrinking files");
return -ENOTSUP;
}
switch (prealloc) {
#ifdef CONFIG_POSIX_FALLOCATE
case PREALLOC_MODE_FALLOC:
/*
* Truncating before posix_fallocate() makes it about twice slower on
* file systems that do not support fallocate(), trying to check if a
* block is allocated before allocating it, so don't do that here.
*/
result = -posix_fallocate(fd, current_length, offset - current_length);
if (result != 0) {
/* posix_fallocate() doesn't set errno. */
error_setg_errno(errp, -result,
"Could not preallocate new data");
}
goto out;
#endif
case PREALLOC_MODE_FULL:
{
int64_t num = 0, left = offset - current_length;
/*
* Knowing the final size from the beginning could allow the file
* system driver to do less allocations and possibly avoid
* fragmentation of the file.
*/
if (ftruncate(fd, offset) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
goto out;
}
buf = g_malloc0(65536);
result = lseek(fd, current_length, SEEK_SET);
if (result < 0) {
result = -errno;
error_setg_errno(errp, -result,
"Failed to seek to the old end of file");
goto out;
}
while (left > 0) {
num = MIN(left, 65536);
result = write(fd, buf, num);
if (result < 0) {
result = -errno;
error_setg_errno(errp, -result,
"Could not write zeros for preallocation");
goto out;
}
left -= result;
}
if (result >= 0) {
result = fsync(fd);
if (result < 0) {
result = -errno;
error_setg_errno(errp, -result,
"Could not flush file to disk");
goto out;
}
}
goto out;
}
case PREALLOC_MODE_OFF:
if (ftruncate(fd, offset) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
}
return result;
default:
result = -ENOTSUP;
error_setg(errp, "Unsupported preallocation mode: %s",
PreallocMode_lookup[prealloc]);
return result;
}
out:
if (result < 0) {
if (ftruncate(fd, current_length) < 0) {
error_report("Failed to restore old file length: %s",
strerror(errno));
}
}
g_free(buf);
return result;
}
static int raw_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
BDRVRawState *s = bs->opaque;
struct stat st;
@@ -1642,12 +1752,16 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
}
if (S_ISREG(st.st_mode)) {
if (ftruncate(s->fd, offset) < 0) {
ret = -errno;
error_setg_errno(errp, -ret, "Failed to resize the file");
return ret;
}
} else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
return raw_regular_truncate(s->fd, offset, prealloc, errp);
}
if (prealloc != PREALLOC_MODE_OFF) {
error_setg(errp, "Preallocation mode '%s' unsupported for this "
"non-regular file", PreallocMode_lookup[prealloc]);
return -ENOTSUP;
}
if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
if (offset > raw_getlength(bs)) {
error_setg(errp, "Cannot grow device files");
return -EINVAL;
@@ -1890,71 +2004,9 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
#endif
}
switch (prealloc) {
#ifdef CONFIG_POSIX_FALLOCATE
case PREALLOC_MODE_FALLOC:
/*
* Truncating before posix_fallocate() makes it about twice slower on
* file systems that do not support fallocate(), trying to check if a
* block is allocated before allocating it, so don't do that here.
*/
result = -posix_fallocate(fd, 0, total_size);
if (result != 0) {
/* posix_fallocate() doesn't set errno. */
error_setg_errno(errp, -result,
"Could not preallocate data for the new file");
}
break;
#endif
case PREALLOC_MODE_FULL:
{
/*
* Knowing the final size from the beginning could allow the file
* system driver to do less allocations and possibly avoid
* fragmentation of the file.
*/
if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
goto out_close;
}
int64_t num = 0, left = total_size;
buf = g_malloc0(65536);
while (left > 0) {
num = MIN(left, 65536);
result = write(fd, buf, num);
if (result < 0) {
result = -errno;
error_setg_errno(errp, -result,
"Could not write to the new file");
break;
}
left -= result;
}
if (result >= 0) {
result = fsync(fd);
if (result < 0) {
result = -errno;
error_setg_errno(errp, -result,
"Could not flush new file to disk");
}
}
g_free(buf);
break;
}
case PREALLOC_MODE_OFF:
if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
}
break;
default:
result = -EINVAL;
error_setg(errp, "Unsupported preallocation mode: %s",
PreallocMode_lookup[prealloc]);
break;
result = raw_regular_truncate(fd, total_size, prealloc, errp);
if (result < 0) {
goto out_close;
}
out_close:
@@ -2114,26 +2166,26 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
}
static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
int64_t offset, int count,
int64_t offset, int bytes,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
return paio_submit(bs, s->fd, offset, NULL, count,
return paio_submit(bs, s->fd, offset, NULL, bytes,
cb, opaque, QEMU_AIO_DISCARD);
}
static int coroutine_fn raw_co_pwrite_zeroes(
BlockDriverState *bs, int64_t offset,
int count, BdrvRequestFlags flags)
int bytes, BdrvRequestFlags flags)
{
BDRVRawState *s = bs->opaque;
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
return paio_submit_co(bs, s->fd, offset, NULL, count,
return paio_submit_co(bs, s->fd, offset, NULL, bytes,
QEMU_AIO_WRITE_ZEROES);
} else if (s->discard_zeroes) {
return paio_submit_co(bs, s->fd, offset, NULL, count,
return paio_submit_co(bs, s->fd, offset, NULL, bytes,
QEMU_AIO_DISCARD);
}
return -ENOTSUP;
@@ -2395,10 +2447,7 @@ static int check_hdev_writable(BDRVRawState *s)
static void hdev_parse_filename(const char *filename, QDict *options,
Error **errp)
{
/* The prefix is optional, just as for "file". */
strstart(filename, "host_device:", &filename);
qdict_put_str(options, "filename", filename);
bdrv_parse_filename_strip_prefix(filename, "host_device:", options);
}
static bool hdev_is_sg(BlockDriverState *bs)
@@ -2568,7 +2617,7 @@ static int fd_open(BlockDriverState *bs)
}
static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs,
int64_t offset, int count,
int64_t offset, int bytes,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
@@ -2576,12 +2625,12 @@ static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs,
if (fd_open(bs) < 0) {
return NULL;
}
return paio_submit(bs, s->fd, offset, NULL, count,
return paio_submit(bs, s->fd, offset, NULL, bytes,
cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
int64_t offset, int bytes, BdrvRequestFlags flags)
{
BDRVRawState *s = bs->opaque;
int rc;
@@ -2591,10 +2640,10 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
return rc;
}
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
return paio_submit_co(bs, s->fd, offset, NULL, count,
return paio_submit_co(bs, s->fd, offset, NULL, bytes,
QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
} else if (s->discard_zeroes) {
return paio_submit_co(bs, s->fd, offset, NULL, count,
return paio_submit_co(bs, s->fd, offset, NULL, bytes,
QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
return -ENOTSUP;
@@ -2697,10 +2746,7 @@ static BlockDriver bdrv_host_device = {
static void cdrom_parse_filename(const char *filename, QDict *options,
Error **errp)
{
/* The prefix is optional, just as for "file". */
strstart(filename, "host_cdrom:", &filename);
qdict_put_str(options, "filename", filename);
bdrv_parse_filename_strip_prefix(filename, "host_cdrom:", options);
}
#endif

View File

@@ -276,12 +276,7 @@ static void raw_parse_flags(int flags, bool use_aio, int *access_flags,
static void raw_parse_filename(const char *filename, QDict *options,
Error **errp)
{
/* The filename does not have to be prefixed by the protocol name, since
* "file" is the default protocol; therefore, the return value of this
* function call can be ignored. */
strstart(filename, "file:", &filename);
qdict_put_str(options, "filename", filename);
bdrv_parse_filename_strip_prefix(filename, "file:", options);
}
static QemuOptsList raw_runtime_opts = {
@@ -466,12 +461,19 @@ static void raw_close(BlockDriverState *bs)
}
}
static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
static int raw_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
BDRVRawState *s = bs->opaque;
LONG low, high;
DWORD dwPtrLow;
if (prealloc != PREALLOC_MODE_OFF) {
error_setg(errp, "Unsupported preallocation mode '%s'",
PreallocMode_lookup[prealloc]);
return -ENOTSUP;
}
low = offset;
high = offset >> 32;
@@ -671,10 +673,7 @@ static int hdev_probe_device(const char *filename)
static void hdev_parse_filename(const char *filename, QDict *options,
Error **errp)
{
/* The prefix is optional, just as for "file". */
strstart(filename, "host_device:", &filename);
qdict_put_str(options, "filename", filename);
bdrv_parse_filename_strip_prefix(filename, "host_device:", options);
}
static int hdev_open(BlockDriverState *bs, QDict *options, int flags,

View File

@@ -345,8 +345,7 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
is_unix = true;
} else if (!strcmp(uri->scheme, "gluster+rdma")) {
gsconf->type = SOCKET_ADDRESS_TYPE_INET;
error_report("Warning: rdma feature is not supported, falling "
"back to tcp");
warn_report("rdma feature is not supported, falling back to tcp");
} else {
ret = -EINVAL;
goto out;
@@ -493,8 +492,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
Error *local_err = NULL;
char *str = NULL;
const char *ptr;
size_t num_servers;
int i, type;
int i, type, num_servers;
/* create opts info from runtime_json_opts list */
opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
@@ -964,29 +962,6 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
qemu_coroutine_yield();
return acb.ret;
}
static inline bool gluster_supports_zerofill(void)
{
return 1;
}
static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
int64_t size)
{
return glfs_zerofill(fd, offset, size);
}
#else
static inline bool gluster_supports_zerofill(void)
{
return 0;
}
static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
int64_t size)
{
return 0;
}
#endif
static int qemu_gluster_create(const char *filename,
@@ -996,9 +971,10 @@ static int qemu_gluster_create(const char *filename,
struct glfs *glfs;
struct glfs_fd *fd;
int ret = 0;
int prealloc = 0;
PreallocMode prealloc;
int64_t total_size = 0;
char *tmp = NULL;
Error *local_err = NULL;
gconf = g_new0(BlockdevOptionsGluster, 1);
gconf->debug = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
@@ -1026,13 +1002,12 @@ static int qemu_gluster_create(const char *filename,
BDRV_SECTOR_SIZE);
tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
if (!tmp || !strcmp(tmp, "off")) {
prealloc = 0;
} else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) {
prealloc = 1;
} else {
error_setg(errp, "Invalid preallocation mode: '%s'"
" or GlusterFS doesn't support zerofill API", tmp);
prealloc = qapi_enum_parse(PreallocMode_lookup, tmp,
PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
&local_err);
g_free(tmp);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto out;
}
@@ -1041,21 +1016,48 @@ static int qemu_gluster_create(const char *filename,
O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
if (!fd) {
ret = -errno;
} else {
goto out;
}
switch (prealloc) {
#ifdef CONFIG_GLUSTERFS_FALLOCATE
case PREALLOC_MODE_FALLOC:
if (glfs_fallocate(fd, 0, 0, total_size)) {
error_setg(errp, "Could not preallocate data for the new file");
ret = -errno;
}
break;
#endif /* CONFIG_GLUSTERFS_FALLOCATE */
#ifdef CONFIG_GLUSTERFS_ZEROFILL
case PREALLOC_MODE_FULL:
if (!glfs_ftruncate(fd, total_size)) {
if (prealloc && qemu_gluster_zerofill(fd, 0, total_size)) {
if (glfs_zerofill(fd, 0, total_size)) {
error_setg(errp, "Could not zerofill the new file");
ret = -errno;
}
} else {
error_setg(errp, "Could not resize file");
ret = -errno;
}
break;
#endif /* CONFIG_GLUSTERFS_ZEROFILL */
case PREALLOC_MODE_OFF:
if (glfs_ftruncate(fd, total_size) != 0) {
ret = -errno;
error_setg(errp, "Could not resize file");
}
break;
default:
ret = -EINVAL;
error_setg(errp, "Unsupported preallocation mode: %s",
PreallocMode_lookup[prealloc]);
break;
}
if (glfs_close(fd) != 0) {
ret = -errno;
}
if (glfs_close(fd) != 0) {
ret = -errno;
}
out:
g_free(tmp);
qapi_free_BlockdevOptionsGluster(gconf);
glfs_clear_preopened(glfs);
return ret;
@@ -1093,11 +1095,17 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
}
static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,
Error **errp)
PreallocMode prealloc, Error **errp)
{
int ret;
BDRVGlusterState *s = bs->opaque;
if (prealloc != PREALLOC_MODE_OFF) {
error_setg(errp, "Unsupported preallocation mode '%s'",
PreallocMode_lookup[prealloc]);
return -ENOTSUP;
}
ret = glfs_ftruncate(s->fd, offset);
if (ret < 0) {
ret = -errno;
@@ -1275,7 +1283,14 @@ static int find_allocation(BlockDriverState *bs, off_t start,
if (offs < 0) {
return -errno; /* D3 or D4 */
}
assert(offs >= start);
if (offs < start) {
/* This is not a valid return by lseek(). We are safe to just return
* -EIO in this case, and we'll treat it like D4. Unfortunately some
* versions of gluster server will return offs < start, so an assert
* here will unnecessarily abort QEMU. */
return -EIO;
}
if (offs > start) {
/* D2: in hole, next data at offs */
@@ -1307,7 +1322,14 @@ static int find_allocation(BlockDriverState *bs, off_t start,
if (offs < 0) {
return -errno; /* D1 and (H3 or H4) */
}
assert(offs >= start);
if (offs < start) {
/* This is not a valid return by lseek(). We are safe to just return
* -EIO in this case, and we'll treat it like H4. Unfortunately some
* versions of gluster server will return offs < start, so an assert
* here will unnecessarily abort QEMU. */
return -EIO;
}
if (offs > start) {
/*

View File

@@ -26,6 +26,7 @@
#include "trace.h"
#include "sysemu/block-backend.h"
#include "block/blockjob.h"
#include "block/blockjob_int.h"
#include "block/block_int.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
@@ -33,16 +34,8 @@
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
int64_t offset,
QEMUIOVector *qiov,
BdrvRequestFlags flags,
BlockCompletionFunc *cb,
void *opaque,
bool is_write);
static void coroutine_fn bdrv_co_do_rw(void *opaque);
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags);
int64_t offset, int bytes, BdrvRequestFlags flags);
void bdrv_parent_drained_begin(BlockDriverState *bs)
{
@@ -129,13 +122,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
*/
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
bs->copy_on_read++;
atomic_inc(&bs->copy_on_read);
}
void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
assert(bs->copy_on_read > 0);
bs->copy_on_read--;
int old = atomic_fetch_dec(&bs->copy_on_read);
assert(old >= 1);
}
/* Check if any requests are in-flight (including throttled requests) */
@@ -156,6 +149,37 @@ bool bdrv_requests_pending(BlockDriverState *bs)
return false;
}
typedef struct {
Coroutine *co;
BlockDriverState *bs;
bool done;
} BdrvCoDrainData;
static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
{
BdrvCoDrainData *data = opaque;
BlockDriverState *bs = data->bs;
bs->drv->bdrv_co_drain(bs);
/* Set data->done before reading bs->wakeup. */
atomic_mb_set(&data->done, true);
bdrv_wakeup(bs);
}
static void bdrv_drain_invoke(BlockDriverState *bs)
{
BdrvCoDrainData data = { .bs = bs, .done = false };
if (!bs->drv || !bs->drv->bdrv_co_drain) {
return;
}
data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
bdrv_coroutine_enter(bs, data.co);
BDRV_POLL_WHILE(bs, !data.done);
}
static bool bdrv_drain_recurse(BlockDriverState *bs)
{
BdrvChild *child, *tmp;
@@ -163,9 +187,8 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
if (bs->drv && bs->drv->bdrv_drain) {
bs->drv->bdrv_drain(bs);
}
/* Ensure any pending metadata writes are submitted to bs->file. */
bdrv_drain_invoke(bs);
QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
BlockDriverState *bs = child->bs;
@@ -191,12 +214,6 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
return waited;
}
typedef struct {
Coroutine *co;
BlockDriverState *bs;
bool done;
} BdrvCoDrainData;
static void bdrv_co_drain_bh_cb(void *opaque)
{
BdrvCoDrainData *data = opaque;
@@ -240,7 +257,7 @@ void bdrv_drained_begin(BlockDriverState *bs)
return;
}
if (!bs->quiesce_counter++) {
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
aio_disable_external(bdrv_get_aio_context(bs));
bdrv_parent_drained_begin(bs);
}
@@ -251,7 +268,7 @@ void bdrv_drained_begin(BlockDriverState *bs)
void bdrv_drained_end(BlockDriverState *bs)
{
assert(bs->quiesce_counter > 0);
if (--bs->quiesce_counter > 0) {
if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
return;
}
@@ -301,16 +318,9 @@ void bdrv_drain_all_begin(void)
bool waited = true;
BlockDriverState *bs;
BdrvNextIterator it;
BlockJob *job = NULL;
GSList *aio_ctxs = NULL, *ctx;
while ((job = block_job_next(job))) {
AioContext *aio_context = blk_get_aio_context(job->blk);
aio_context_acquire(aio_context);
block_job_pause(job);
aio_context_release(aio_context);
}
block_job_pause_all();
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
@@ -354,7 +364,6 @@ void bdrv_drain_all_end(void)
{
BlockDriverState *bs;
BdrvNextIterator it;
BlockJob *job = NULL;
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
@@ -365,13 +374,7 @@ void bdrv_drain_all_end(void)
aio_context_release(aio_context);
}
while ((job = block_job_next(job))) {
AioContext *aio_context = blk_get_aio_context(job->blk);
aio_context_acquire(aio_context);
block_job_resume(job);
aio_context_release(aio_context);
}
block_job_resume_all();
}
void bdrv_drain_all(void)
@@ -388,11 +391,13 @@ void bdrv_drain_all(void)
static void tracked_request_end(BdrvTrackedRequest *req)
{
if (req->serialising) {
req->bs->serialising_in_flight--;
atomic_dec(&req->bs->serialising_in_flight);
}
qemu_co_mutex_lock(&req->bs->reqs_lock);
QLIST_REMOVE(req, list);
qemu_co_queue_restart_all(&req->wait_queue);
qemu_co_mutex_unlock(&req->bs->reqs_lock);
}
/**
@@ -417,7 +422,9 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
qemu_co_queue_init(&req->wait_queue);
qemu_co_mutex_lock(&bs->reqs_lock);
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
qemu_co_mutex_unlock(&bs->reqs_lock);
}
static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
@@ -427,7 +434,7 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
- overlap_offset;
if (!req->serialising) {
req->bs->serialising_in_flight++;
atomic_inc(&req->bs->serialising_in_flight);
req->serialising = true;
}
@@ -435,27 +442,6 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
}
/**
* Round a region to cluster boundaries (sector-based)
*/
void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
int64_t *cluster_sector_num,
int *cluster_nb_sectors)
{
BlockDriverInfo bdi;
if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
*cluster_sector_num = sector_num;
*cluster_nb_sectors = nb_sectors;
} else {
int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
*cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
*cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
nb_sectors, c);
}
}
/**
* Round a region to cluster boundaries
*/
@@ -514,7 +500,8 @@ static void dummy_bh_cb(void *opaque)
void bdrv_wakeup(BlockDriverState *bs)
{
if (bs->wakeup) {
/* The barrier (or an atomic op) is in the caller. */
if (atomic_read(&bs->wakeup)) {
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
}
}
@@ -532,12 +519,13 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
bool retry;
bool waited = false;
if (!bs->serialising_in_flight) {
if (!atomic_read(&bs->serialising_in_flight)) {
return false;
}
do {
retry = false;
qemu_co_mutex_lock(&bs->reqs_lock);
QLIST_FOREACH(req, &bs->tracked_requests, list) {
if (req == self || (!req->serialising && !self->serialising)) {
continue;
@@ -556,7 +544,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
* (instead of producing a deadlock in the former case). */
if (!req->waiting_for) {
self->waiting_for = req;
qemu_co_queue_wait(&req->wait_queue, NULL);
qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
self->waiting_for = NULL;
retry = true;
waited = true;
@@ -564,6 +552,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
}
}
}
qemu_co_mutex_unlock(&bs->reqs_lock);
} while (retry);
return waited;
@@ -680,12 +669,12 @@ int bdrv_write(BdrvChild *child, int64_t sector_num,
}
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int count, BdrvRequestFlags flags)
int bytes, BdrvRequestFlags flags)
{
QEMUIOVector qiov;
struct iovec iov = {
.iov_base = NULL,
.iov_len = count,
.iov_len = bytes,
};
qemu_iovec_init_external(&qiov, &iov, 1);
@@ -1068,17 +1057,18 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
}
if (flags & BDRV_REQ_COPY_ON_READ) {
int64_t start_sector = offset >> BDRV_SECTOR_BITS;
int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
unsigned int nb_sectors = end_sector - start_sector;
int pnum;
/* TODO: Simplify further once bdrv_is_allocated no longer
* requires sector alignment */
int64_t start = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE);
int64_t end = QEMU_ALIGN_UP(offset + bytes, BDRV_SECTOR_SIZE);
int64_t pnum;
ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum);
ret = bdrv_is_allocated(bs, start, end - start, &pnum);
if (ret < 0) {
goto out;
}
if (!ret || pnum != nb_sectors) {
if (!ret || pnum != end - start) {
ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
goto out;
}
@@ -1157,7 +1147,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
bdrv_inc_in_flight(bs);
/* Don't do copy-on-read if we read data before write operation */
if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
flags |= BDRV_REQ_COPY_ON_READ;
}
@@ -1226,7 +1216,7 @@ int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
#define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
int64_t offset, int bytes, BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
QEMUIOVector qiov;
@@ -1244,12 +1234,12 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
assert(alignment % bs->bl.request_alignment == 0);
head = offset % alignment;
tail = (offset + count) % alignment;
tail = (offset + bytes) % alignment;
max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
assert(max_write_zeroes >= bs->bl.request_alignment);
while (count > 0 && !ret) {
int num = count;
while (bytes > 0 && !ret) {
int num = bytes;
/* Align request. Block drivers can expect the "bulk" of the request
* to be aligned, and that unaligned requests do not cross cluster
@@ -1259,7 +1249,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
/* Make a small request up to the first aligned sector. For
* convenience, limit this request to max_transfer even if
* we don't need to fall back to writes. */
num = MIN(MIN(count, max_transfer), alignment - head);
num = MIN(MIN(bytes, max_transfer), alignment - head);
head = (head + num) % alignment;
assert(num < max_write_zeroes);
} else if (tail && num > alignment) {
@@ -1320,7 +1310,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
}
offset += num;
count -= num;
bytes -= num;
}
fail:
@@ -1349,6 +1339,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
uint64_t bytes_remaining = bytes;
int max_transfer;
if (bdrv_has_readonly_bitmaps(bs)) {
return -EPERM;
}
assert(is_power_of_2(align));
assert((offset & (align - 1)) == 0);
assert((bytes & (align - 1)) == 0);
@@ -1414,12 +1408,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
++bs->write_gen;
atomic_inc(&bs->write_gen);
bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
if (bs->wr_highest_offset < offset + bytes) {
bs->wr_highest_offset = offset + bytes;
}
stat64_max(&bs->wr_highest_offset, offset + bytes);
if (ret >= 0) {
bs->total_sectors = MAX(bs->total_sectors, end_sector);
@@ -1674,15 +1666,15 @@ int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
}
int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
int count, BdrvRequestFlags flags)
int bytes, BdrvRequestFlags flags)
{
trace_bdrv_co_pwrite_zeroes(child->bs, offset, count, flags);
trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
flags &= ~BDRV_REQ_MAY_UNMAP;
}
return bdrv_co_pwritev(child, offset, count, NULL,
return bdrv_co_pwritev(child, offset, bytes, NULL,
BDRV_REQ_ZERO_WRITE | flags);
}
@@ -1727,15 +1719,16 @@ typedef struct BdrvCoGetBlockStatusData {
* Drivers not implementing the functionality are assumed to not support
* backing files, hence all their sectors are reported as allocated.
*
* If 'sector_num' is beyond the end of the disk image the return value is 0
* and 'pnum' is set to 0.
* If 'sector_num' is beyond the end of the disk image the return value is
* BDRV_BLOCK_EOF and 'pnum' is set to 0.
*
* 'pnum' is set to the number of sectors (including and immediately following
* the specified sector) that are known to be in the same
* allocated/unallocated state.
*
* 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
* beyond the end of the disk image it will be clamped.
* beyond the end of the disk image it will be clamped; if 'pnum' is set to
* the end of the image, then the returned value will include BDRV_BLOCK_EOF.
*
* If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file'
* points to the BDS which the sector range is allocated in.
@@ -1749,6 +1742,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
int64_t n;
int64_t ret, ret2;
*file = NULL;
total_sectors = bdrv_nb_sectors(bs);
if (total_sectors < 0) {
return total_sectors;
@@ -1756,7 +1750,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
if (sector_num >= total_sectors) {
*pnum = 0;
return 0;
return BDRV_BLOCK_EOF;
}
n = total_sectors - sector_num;
@@ -1767,13 +1761,16 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
if (!bs->drv->bdrv_co_get_block_status) {
*pnum = nb_sectors;
ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
if (sector_num + nb_sectors == total_sectors) {
ret |= BDRV_BLOCK_EOF;
}
if (bs->drv->protocol_name) {
ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
*file = bs;
}
return ret;
}
*file = NULL;
bdrv_inc_in_flight(bs);
ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
file);
@@ -1783,7 +1780,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
}
if (ret & BDRV_BLOCK_RAW) {
assert(ret & BDRV_BLOCK_OFFSET_VALID);
assert(ret & BDRV_BLOCK_OFFSET_VALID && *file);
ret = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
*pnum, pnum, file);
goto out;
@@ -1815,10 +1812,13 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
/* Ignore errors. This is just providing extra information, it
* is useful but not necessary.
*/
if (!file_pnum) {
/* !file_pnum indicates an offset at or beyond the EOF; it is
* perfectly valid for the format block driver to point to such
* offsets, so catch it and mark everything as zero */
if (ret2 & BDRV_BLOCK_EOF &&
(!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
/*
* It is valid for the format block driver to read
* beyond the end of the underlying file's current
* size; such areas read as zero.
*/
ret |= BDRV_BLOCK_ZERO;
} else {
/* Limit request to the range reported by the protocol driver */
@@ -1830,6 +1830,9 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
out:
bdrv_dec_in_flight(bs);
if (ret >= 0 && sector_num + *pnum == total_sectors) {
ret |= BDRV_BLOCK_EOF;
}
return ret;
}
@@ -1842,16 +1845,30 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
{
BlockDriverState *p;
int64_t ret = 0;
bool first = true;
assert(bs != base);
for (p = bs; p != base; p = backing_bs(p)) {
ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file);
if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
if (ret < 0) {
break;
}
if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
/*
* Reading beyond the end of the file continues to read
* zeroes, but we can only widen the result to the
* unallocated length we learned from an earlier
* iteration.
*/
*pnum = nb_sectors;
}
if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
break;
}
/* [sector_num, pnum] unallocated on this layer, which could be only
* the first part of [sector_num, nb_sectors]. */
nb_sectors = MIN(nb_sectors, *pnum);
first = false;
}
return ret;
}
@@ -1912,59 +1929,72 @@ int64_t bdrv_get_block_status(BlockDriverState *bs,
sector_num, nb_sectors, pnum, file);
}
int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
int64_t bytes, int64_t *pnum)
{
BlockDriverState *file;
int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum,
&file);
int64_t sector_num = offset >> BDRV_SECTOR_BITS;
int nb_sectors = bytes >> BDRV_SECTOR_BITS;
int64_t ret;
int psectors;
assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE) && bytes < INT_MAX);
ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &psectors,
&file);
if (ret < 0) {
return ret;
}
if (pnum) {
*pnum = psectors * BDRV_SECTOR_SIZE;
}
return !!(ret & BDRV_BLOCK_ALLOCATED);
}
/*
* Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
*
* Return true if the given sector is allocated in any image between
* BASE and TOP (inclusive). BASE can be NULL to check if the given
* sector is allocated in any image of the chain. Return false otherwise.
* Return true if (a prefix of) the given range is allocated in any image
* between BASE and TOP (inclusive). BASE can be NULL to check if the given
* offset is allocated in any image of the chain. Return false otherwise,
* or negative errno on failure.
*
* 'pnum' is set to the number of sectors (including and immediately following
* the specified sector) that are known to be in the same
* allocated/unallocated state.
* 'pnum' is set to the number of bytes (including and immediately
* following the specified offset) that are known to be in the same
* allocated/unallocated state. Note that a subsequent call starting
* at 'offset + *pnum' may return the same allocation status (in other
* words, the result is not necessarily the maximum possible range);
* but 'pnum' will only be 0 when end of file is reached.
*
*/
int bdrv_is_allocated_above(BlockDriverState *top,
BlockDriverState *base,
int64_t sector_num,
int nb_sectors, int *pnum)
int64_t offset, int64_t bytes, int64_t *pnum)
{
BlockDriverState *intermediate;
int ret, n = nb_sectors;
int ret;
int64_t n = bytes;
intermediate = top;
while (intermediate && intermediate != base) {
int pnum_inter;
ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
&pnum_inter);
int64_t pnum_inter;
int64_t size_inter;
ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
if (ret < 0) {
return ret;
} else if (ret) {
}
if (ret) {
*pnum = pnum_inter;
return 1;
}
/*
* [sector_num, nb_sectors] is unallocated on top but intermediate
* might have
*
* [sector_num+x, nr_sectors] allocated.
*/
size_inter = bdrv_getlength(intermediate);
if (size_inter < 0) {
return size_inter;
}
if (n > pnum_inter &&
(intermediate == top ||
sector_num + pnum_inter < intermediate->total_sectors)) {
(intermediate == top || offset + pnum_inter < size_inter)) {
n = pnum_inter;
}
@@ -1988,17 +2018,24 @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read)
{
BlockDriver *drv = bs->drv;
int ret = -ENOTSUP;
bdrv_inc_in_flight(bs);
if (!drv) {
return -ENOMEDIUM;
ret = -ENOMEDIUM;
} else if (drv->bdrv_load_vmstate) {
return is_read ? drv->bdrv_load_vmstate(bs, qiov, pos)
: drv->bdrv_save_vmstate(bs, qiov, pos);
if (is_read) {
ret = drv->bdrv_load_vmstate(bs, qiov, pos);
} else {
ret = drv->bdrv_save_vmstate(bs, qiov, pos);
}
} else if (bs->file) {
return bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
}
return -ENOTSUP;
bdrv_dec_in_flight(bs);
return ret;
}
static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
@@ -2024,9 +2061,7 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
bdrv_coroutine_enter(bs, co);
while (data.ret == -EINPROGRESS) {
aio_poll(bdrv_get_aio_context(bs), true);
}
BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
return data.ret;
}
}
@@ -2083,28 +2118,6 @@ int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
/**************************************************************/
/* async I/Os */
BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
{
trace_bdrv_aio_readv(child->bs, sector_num, nb_sectors, opaque);
assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
0, cb, opaque, false);
}
BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
{
trace_bdrv_aio_writev(child->bs, sector_num, nb_sectors, opaque);
assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
0, cb, opaque, true);
}
void bdrv_aio_cancel(BlockAIOCB *acb)
{
qemu_aio_ref(acb);
@@ -2136,147 +2149,6 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb)
}
}
/**************************************************************/
/* async block device emulation */
typedef struct BlockRequest {
union {
/* Used during read, write, trim */
struct {
int64_t offset;
int bytes;
int flags;
QEMUIOVector *qiov;
};
/* Used during ioctl */
struct {
int req;
void *buf;
};
};
BlockCompletionFunc *cb;
void *opaque;
int error;
} BlockRequest;
typedef struct BlockAIOCBCoroutine {
BlockAIOCB common;
BdrvChild *child;
BlockRequest req;
bool is_write;
bool need_bh;
bool *done;
} BlockAIOCBCoroutine;
static const AIOCBInfo bdrv_em_co_aiocb_info = {
.aiocb_size = sizeof(BlockAIOCBCoroutine),
};
static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
{
if (!acb->need_bh) {
bdrv_dec_in_flight(acb->common.bs);
acb->common.cb(acb->common.opaque, acb->req.error);
qemu_aio_unref(acb);
}
}
static void bdrv_co_em_bh(void *opaque)
{
BlockAIOCBCoroutine *acb = opaque;
assert(!acb->need_bh);
bdrv_co_complete(acb);
}
static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
{
acb->need_bh = false;
if (acb->req.error != -EINPROGRESS) {
BlockDriverState *bs = acb->common.bs;
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
}
}
/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
static void coroutine_fn bdrv_co_do_rw(void *opaque)
{
BlockAIOCBCoroutine *acb = opaque;
if (!acb->is_write) {
acb->req.error = bdrv_co_preadv(acb->child, acb->req.offset,
acb->req.qiov->size, acb->req.qiov, acb->req.flags);
} else {
acb->req.error = bdrv_co_pwritev(acb->child, acb->req.offset,
acb->req.qiov->size, acb->req.qiov, acb->req.flags);
}
bdrv_co_complete(acb);
}
static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
int64_t offset,
QEMUIOVector *qiov,
BdrvRequestFlags flags,
BlockCompletionFunc *cb,
void *opaque,
bool is_write)
{
Coroutine *co;
BlockAIOCBCoroutine *acb;
/* Matched by bdrv_co_complete's bdrv_dec_in_flight. */
bdrv_inc_in_flight(child->bs);
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, child->bs, cb, opaque);
acb->child = child;
acb->need_bh = true;
acb->req.error = -EINPROGRESS;
acb->req.offset = offset;
acb->req.qiov = qiov;
acb->req.flags = flags;
acb->is_write = is_write;
co = qemu_coroutine_create(bdrv_co_do_rw, acb);
bdrv_coroutine_enter(child->bs, co);
bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
}
static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
{
BlockAIOCBCoroutine *acb = opaque;
BlockDriverState *bs = acb->common.bs;
acb->req.error = bdrv_co_flush(bs);
bdrv_co_complete(acb);
}
BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque)
{
trace_bdrv_aio_flush(bs, opaque);
Coroutine *co;
BlockAIOCBCoroutine *acb;
/* Matched by bdrv_co_complete's bdrv_dec_in_flight. */
bdrv_inc_in_flight(bs);
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
acb->need_bh = true;
acb->req.error = -EINPROGRESS;
co = qemu_coroutine_create(bdrv_aio_flush_co_entry, acb);
bdrv_coroutine_enter(bs, co);
bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
}
/**************************************************************/
/* Coroutine block device emulation */
@@ -2305,14 +2177,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
goto early_exit;
}
current_gen = bs->write_gen;
qemu_co_mutex_lock(&bs->reqs_lock);
current_gen = atomic_read(&bs->write_gen);
/* Wait until any previous flushes are completed */
while (bs->active_flush_req) {
qemu_co_queue_wait(&bs->flush_queue, NULL);
qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
}
/* Flushes reach this point in nondecreasing current_gen order. */
bs->active_flush_req = true;
qemu_co_mutex_unlock(&bs->reqs_lock);
/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
@@ -2384,9 +2259,12 @@ out:
if (ret == 0) {
bs->flushed_gen = current_gen;
}
qemu_co_mutex_lock(&bs->reqs_lock);
bs->active_flush_req = false;
/* Return value is ignored - it's ok if wait queue is empty */
qemu_co_queue_next(&bs->flush_queue);
qemu_co_mutex_unlock(&bs->reqs_lock);
early_exit:
bdrv_dec_in_flight(bs);
@@ -2416,18 +2294,18 @@ int bdrv_flush(BlockDriverState *bs)
typedef struct DiscardCo {
BlockDriverState *bs;
int64_t offset;
int count;
int bytes;
int ret;
} DiscardCo;
static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
{
DiscardCo *rwco = opaque;
rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->count);
rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->bytes);
}
int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
int count)
int bytes)
{
BdrvTrackedRequest req;
int max_pdiscard, ret;
@@ -2437,7 +2315,11 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
return -ENOMEDIUM;
}
ret = bdrv_check_byte_request(bs, offset, count);
if (bdrv_has_readonly_bitmaps(bs)) {
return -EPERM;
}
ret = bdrv_check_byte_request(bs, offset, bytes);
if (ret < 0) {
return ret;
} else if (bs->read_only) {
@@ -2462,10 +2344,10 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
assert(align % bs->bl.request_alignment == 0);
head = offset % align;
tail = (offset + count) % align;
tail = (offset + bytes) % align;
bdrv_inc_in_flight(bs);
tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD);
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
if (ret < 0) {
@@ -2476,13 +2358,12 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
align);
assert(max_pdiscard >= bs->bl.request_alignment);
while (count > 0) {
int ret;
int num = count;
while (bytes > 0) {
int num = bytes;
if (head) {
/* Make small requests to get to alignment boundaries. */
num = MIN(count, align - head);
num = MIN(bytes, align - head);
if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
num %= bs->bl.request_alignment;
}
@@ -2526,11 +2407,11 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
}
offset += num;
count -= num;
bytes -= num;
}
ret = 0;
out:
++bs->write_gen;
atomic_inc(&bs->write_gen);
bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
req.bytes >> BDRV_SECTOR_BITS);
tracked_request_end(&req);
@@ -2538,13 +2419,13 @@ out:
return ret;
}
int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
{
Coroutine *co;
DiscardCo rwco = {
.bs = bs,
.offset = offset,
.count = count,
.bytes = bytes,
.ret = NOT_DONE,
};
@@ -2657,7 +2538,7 @@ void bdrv_io_plug(BlockDriverState *bs)
bdrv_io_plug(child->bs);
}
if (bs->io_plugged++ == 0) {
if (atomic_fetch_inc(&bs->io_plugged) == 0) {
BlockDriver *drv = bs->drv;
if (drv && drv->bdrv_io_plug) {
drv->bdrv_io_plug(bs);
@@ -2670,7 +2551,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
BdrvChild *child;
assert(bs->io_plugged);
if (--bs->io_plugged == 0) {
if (atomic_fetch_dec(&bs->io_plugged) == 1) {
BlockDriver *drv = bs->drv;
if (drv && drv->bdrv_io_unplug) {
drv->bdrv_io_unplug(bs);

View File

@@ -1116,14 +1116,14 @@ iscsi_getlength(BlockDriverState *bs)
}
static int
coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
struct unmap_list list;
int r = 0;
if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
return -ENOTSUP;
}
@@ -1133,7 +1133,7 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
}
list.lba = offset / iscsilun->block_size;
list.num = count / iscsilun->block_size;
list.num = bytes / iscsilun->block_size;
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
@@ -1174,7 +1174,7 @@ retry:
}
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
bytes >> BDRV_SECTOR_BITS);
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
@@ -1183,7 +1183,7 @@ out_unlock:
static int
coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
int count, BdrvRequestFlags flags)
int bytes, BdrvRequestFlags flags)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
@@ -1192,7 +1192,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
bool use_16_for_ws = iscsilun->use_16_for_rw;
int r = 0;
if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
return -ENOTSUP;
}
@@ -1215,7 +1215,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
}
lba = offset / iscsilun->block_size;
nb_blocks = count / iscsilun->block_size;
nb_blocks = bytes / iscsilun->block_size;
if (iscsilun->zeroblock == NULL) {
iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
@@ -1273,17 +1273,17 @@ retry:
if (iTask.status != SCSI_STATUS_GOOD) {
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
bytes >> BDRV_SECTOR_BITS);
r = iTask.err_code;
goto out_unlock;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
bytes >> BDRV_SECTOR_BITS);
} else {
iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
bytes >> BDRV_SECTOR_BITS);
}
out_unlock:
@@ -1732,6 +1732,10 @@ static QemuOptsList runtime_opts = {
.name = "timeout",
.type = QEMU_OPT_NUMBER,
},
{
.name = "filename",
.type = QEMU_OPT_STRING,
},
{ /* end of list */ }
},
};
@@ -1747,12 +1751,27 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
char *initiator_name = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *transport_name, *portal, *target;
const char *transport_name, *portal, *target, *filename;
#if LIBISCSI_API_VERSION >= (20160603)
enum iscsi_transport_type transport;
#endif
int i, ret = 0, timeout = 0, lun;
/* If we are given a filename, parse the filename, with precedence given to
* filename encoded options */
filename = qdict_get_try_str(options, "filename");
if (filename) {
warn_report("'filename' option specified. "
"This is an unsupported option, and may be deprecated "
"in the future");
iscsi_parse_filename(filename, options, &local_err);
if (local_err) {
ret = -EINVAL;
error_propagate(errp, local_err);
goto exit;
}
}
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
@@ -1967,6 +1986,7 @@ out:
}
memset(iscsilun, 0, sizeof(IscsiLun));
}
exit:
return ret;
}
@@ -2059,11 +2079,18 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
}
}
static int iscsi_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
static int iscsi_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
IscsiLun *iscsilun = bs->opaque;
Error *local_err = NULL;
if (prealloc != PREALLOC_MODE_OFF) {
error_setg(errp, "Unsupported preallocation mode '%s'",
PreallocMode_lookup[prealloc]);
return -ENOTSUP;
}
if (iscsilun->type != TYPE_DISK) {
error_setg(errp, "Cannot resize non-disk iSCSI devices");
return -ENOTSUP;

View File

@@ -24,9 +24,8 @@
#define SLICE_TIME 100000000ULL /* ns */
#define MAX_IN_FLIGHT 16
#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
#define DEFAULT_MIRROR_BUF_SIZE \
(MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
#define MAX_IO_BYTES (1 << 20) /* 1 Mb */
#define DEFAULT_MIRROR_BUF_SIZE (MAX_IN_FLIGHT * MAX_IO_BYTES)
/* The mirroring buffer is a list of granularity-sized chunks.
* Free chunks are organized in a list.
@@ -67,11 +66,11 @@ typedef struct MirrorBlockJob {
uint64_t last_pause_ns;
unsigned long *in_flight_bitmap;
int in_flight;
int64_t sectors_in_flight;
int64_t bytes_in_flight;
int ret;
bool unmap;
bool waiting_for_io;
int target_cluster_sectors;
int target_cluster_size;
int max_iov;
bool initial_zeroing_ongoing;
} MirrorBlockJob;
@@ -79,8 +78,8 @@ typedef struct MirrorBlockJob {
typedef struct MirrorOp {
MirrorBlockJob *s;
QEMUIOVector qiov;
int64_t sector_num;
int nb_sectors;
int64_t offset;
uint64_t bytes;
} MirrorOp;
static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
@@ -101,12 +100,12 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
MirrorBlockJob *s = op->s;
struct iovec *iov;
int64_t chunk_num;
int i, nb_chunks, sectors_per_chunk;
int i, nb_chunks;
trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);
trace_mirror_iteration_done(s, op->offset, op->bytes, ret);
s->in_flight--;
s->sectors_in_flight -= op->nb_sectors;
s->bytes_in_flight -= op->bytes;
iov = op->qiov.iov;
for (i = 0; i < op->qiov.niov; i++) {
MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
@@ -114,16 +113,15 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
s->buf_free_count++;
}
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
chunk_num = op->sector_num / sectors_per_chunk;
nb_chunks = DIV_ROUND_UP(op->nb_sectors, sectors_per_chunk);
chunk_num = op->offset / s->granularity;
nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
if (ret >= 0) {
if (s->cow_bitmap) {
bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
}
if (!s->initial_zeroing_ongoing) {
s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
s->common.offset += op->bytes;
}
}
qemu_iovec_destroy(&op->qiov);
@@ -143,7 +141,8 @@ static void mirror_write_complete(void *opaque, int ret)
if (ret < 0) {
BlockErrorAction action;
bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS,
op->bytes >> BDRV_SECTOR_BITS);
action = mirror_error_action(s, false, -ret);
if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
s->ret = ret;
@@ -162,7 +161,8 @@ static void mirror_read_complete(void *opaque, int ret)
if (ret < 0) {
BlockErrorAction action;
bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS,
op->bytes >> BDRV_SECTOR_BITS);
action = mirror_error_action(s, true, -ret);
if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
s->ret = ret;
@@ -170,56 +170,53 @@ static void mirror_read_complete(void *opaque, int ret)
mirror_iteration_done(op, ret);
} else {
blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
blk_aio_pwritev(s->target, op->offset, &op->qiov,
0, mirror_write_complete, op);
}
aio_context_release(blk_get_aio_context(s->common.blk));
}
static inline void mirror_clip_sectors(MirrorBlockJob *s,
int64_t sector_num,
int *nb_sectors)
/* Clip bytes relative to offset to not exceed end-of-file */
static inline int64_t mirror_clip_bytes(MirrorBlockJob *s,
int64_t offset,
int64_t bytes)
{
*nb_sectors = MIN(*nb_sectors,
s->bdev_length / BDRV_SECTOR_SIZE - sector_num);
return MIN(bytes, s->bdev_length - offset);
}
/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
* return the offset of the adjusted tail sector against original. */
static int mirror_cow_align(MirrorBlockJob *s,
int64_t *sector_num,
int *nb_sectors)
/* Round offset and/or bytes to target cluster if COW is needed, and
* return the offset of the adjusted tail against original. */
static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
uint64_t *bytes)
{
bool need_cow;
int ret = 0;
int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
int64_t align_sector_num = *sector_num;
int align_nb_sectors = *nb_sectors;
int max_sectors = chunk_sectors * s->max_iov;
int64_t align_offset = *offset;
unsigned int align_bytes = *bytes;
int max_bytes = s->granularity * s->max_iov;
need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
assert(*bytes < INT_MAX);
need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap);
need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
s->cow_bitmap);
if (need_cow) {
bdrv_round_sectors_to_clusters(blk_bs(s->target), *sector_num,
*nb_sectors, &align_sector_num,
&align_nb_sectors);
bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes,
&align_offset, &align_bytes);
}
if (align_nb_sectors > max_sectors) {
align_nb_sectors = max_sectors;
if (align_bytes > max_bytes) {
align_bytes = max_bytes;
if (need_cow) {
align_nb_sectors = QEMU_ALIGN_DOWN(align_nb_sectors,
s->target_cluster_sectors);
align_bytes = QEMU_ALIGN_DOWN(align_bytes, s->target_cluster_size);
}
}
/* Clipping may result in align_nb_sectors unaligned to chunk boundary, but
/* Clipping may result in align_bytes unaligned to chunk boundary, but
* that doesn't matter because it's already the end of source image. */
mirror_clip_sectors(s, align_sector_num, &align_nb_sectors);
align_bytes = mirror_clip_bytes(s, align_offset, align_bytes);
ret = align_sector_num + align_nb_sectors - (*sector_num + *nb_sectors);
*sector_num = align_sector_num;
*nb_sectors = align_nb_sectors;
ret = align_offset + align_bytes - (*offset + *bytes);
*offset = align_offset;
*bytes = align_bytes;
assert(ret >= 0);
return ret;
}
@@ -233,50 +230,51 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s)
}
/* Submit async read while handling COW.
* Returns: The number of sectors copied after and including sector_num,
* excluding any sectors copied prior to sector_num due to alignment.
* This will be nb_sectors if no alignment is necessary, or
* (new_end - sector_num) if tail is rounded up or down due to
* Returns: The number of bytes copied after and including offset,
* excluding any bytes copied prior to offset due to alignment.
* This will be @bytes if no alignment is necessary, or
* (new_end - offset) if tail is rounded up or down due to
* alignment or buffer limit.
*/
static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
int nb_sectors)
static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset,
uint64_t bytes)
{
BlockBackend *source = s->common.blk;
int sectors_per_chunk, nb_chunks;
int ret;
int nb_chunks;
uint64_t ret;
MirrorOp *op;
int max_sectors;
uint64_t max_bytes;
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
max_sectors = sectors_per_chunk * s->max_iov;
max_bytes = s->granularity * s->max_iov;
/* We can only handle as much as buf_size at a time. */
nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
nb_sectors = MIN(max_sectors, nb_sectors);
assert(nb_sectors);
ret = nb_sectors;
bytes = MIN(s->buf_size, MIN(max_bytes, bytes));
assert(bytes);
assert(bytes < BDRV_REQUEST_MAX_BYTES);
ret = bytes;
if (s->cow_bitmap) {
ret += mirror_cow_align(s, &sector_num, &nb_sectors);
ret += mirror_cow_align(s, &offset, &bytes);
}
assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size);
/* The sector range must meet granularity because:
assert(bytes <= s->buf_size);
/* The offset is granularity-aligned because:
* 1) Caller passes in aligned values;
* 2) mirror_cow_align is used only when target cluster is larger. */
assert(!(sector_num % sectors_per_chunk));
nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk);
assert(QEMU_IS_ALIGNED(offset, s->granularity));
/* The range is sector-aligned, since bdrv_getlength() rounds up. */
assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
nb_chunks = DIV_ROUND_UP(bytes, s->granularity);
while (s->buf_free_count < nb_chunks) {
trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
trace_mirror_yield_in_flight(s, offset, s->in_flight);
mirror_wait_for_io(s);
}
/* Allocate a MirrorOp that is used as an AIO callback. */
op = g_new(MirrorOp, 1);
op->s = s;
op->sector_num = sector_num;
op->nb_sectors = nb_sectors;
op->offset = offset;
op->bytes = bytes;
/* Now make a QEMUIOVector taking enough granularity-sized chunks
* from s->buf_free.
@@ -284,7 +282,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
qemu_iovec_init(&op->qiov, nb_chunks);
while (nb_chunks-- > 0) {
MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
size_t remaining = nb_sectors * BDRV_SECTOR_SIZE - op->qiov.size;
size_t remaining = bytes - op->qiov.size;
QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
s->buf_free_count--;
@@ -293,17 +291,16 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
/* Copy the dirty cluster. */
s->in_flight++;
s->sectors_in_flight += nb_sectors;
trace_mirror_one_iteration(s, sector_num, nb_sectors);
s->bytes_in_flight += bytes;
trace_mirror_one_iteration(s, offset, bytes);
blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov, 0,
mirror_read_complete, op);
blk_aio_preadv(source, offset, &op->qiov, 0, mirror_read_complete, op);
return ret;
}
static void mirror_do_zero_or_discard(MirrorBlockJob *s,
int64_t sector_num,
int nb_sectors,
int64_t offset,
uint64_t bytes,
bool is_discard)
{
MirrorOp *op;
@@ -312,19 +309,17 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
* so the freeing in mirror_iteration_done is nop. */
op = g_new0(MirrorOp, 1);
op->s = s;
op->sector_num = sector_num;
op->nb_sectors = nb_sectors;
op->offset = offset;
op->bytes = bytes;
s->in_flight++;
s->sectors_in_flight += nb_sectors;
s->bytes_in_flight += bytes;
if (is_discard) {
blk_aio_pdiscard(s->target, sector_num << BDRV_SECTOR_BITS,
op->nb_sectors << BDRV_SECTOR_BITS,
mirror_write_complete, op);
blk_aio_pdiscard(s->target, offset,
op->bytes, mirror_write_complete, op);
} else {
blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE,
op->nb_sectors * BDRV_SECTOR_SIZE,
s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
blk_aio_pwrite_zeroes(s->target, offset,
op->bytes, s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
mirror_write_complete, op);
}
}
@@ -332,27 +327,28 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
BlockDriverState *source = s->source;
int64_t sector_num, first_chunk;
int64_t offset, first_chunk;
uint64_t delay_ns = 0;
/* At least the first dirty chunk is mirrored in one iteration. */
int nb_chunks = 1;
int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
MAX_IO_SECTORS);
int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);
sector_num = bdrv_dirty_iter_next(s->dbi);
if (sector_num < 0) {
bdrv_dirty_bitmap_lock(s->dirty_bitmap);
offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
if (offset < 0) {
bdrv_set_dirty_iter(s->dbi, 0);
sector_num = bdrv_dirty_iter_next(s->dbi);
trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
assert(sector_num >= 0);
offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) *
BDRV_SECTOR_SIZE);
assert(offset >= 0);
}
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
first_chunk = sector_num / sectors_per_chunk;
first_chunk = offset / s->granularity;
while (test_bit(first_chunk, s->in_flight_bitmap)) {
trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
trace_mirror_yield_in_flight(s, offset, s->in_flight);
mirror_wait_for_io(s);
}
@@ -360,25 +356,27 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
/* Find the number of consective dirty chunks following the first dirty
* one, and wait for in flight requests in them. */
while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
bdrv_dirty_bitmap_lock(s->dirty_bitmap);
while (nb_chunks * s->granularity < s->buf_size) {
int64_t next_dirty;
int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
int64_t next_chunk = next_sector / sectors_per_chunk;
if (next_sector >= end ||
!bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
int64_t next_offset = offset + nb_chunks * s->granularity;
int64_t next_chunk = next_offset / s->granularity;
if (next_offset >= s->bdev_length ||
!bdrv_get_dirty_locked(source, s->dirty_bitmap,
next_offset >> BDRV_SECTOR_BITS)) {
break;
}
if (test_bit(next_chunk, s->in_flight_bitmap)) {
break;
}
next_dirty = bdrv_dirty_iter_next(s->dbi);
if (next_dirty > next_sector || next_dirty < 0) {
next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
if (next_dirty > next_offset || next_dirty < 0) {
/* The bitmap iterator's cache is stale, refresh it */
bdrv_set_dirty_iter(s->dbi, next_sector);
next_dirty = bdrv_dirty_iter_next(s->dbi);
bdrv_set_dirty_iter(s->dbi, next_offset >> BDRV_SECTOR_BITS);
next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
}
assert(next_dirty == next_sector);
assert(next_dirty == next_offset);
nb_chunks++;
}
@@ -386,12 +384,16 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
* calling bdrv_get_block_status_above could yield - if some blocks are
* marked dirty in this window, we need to know.
*/
bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
nb_chunks * sectors_per_chunk);
bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
while (nb_chunks > 0 && sector_num < end) {
bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset >> BDRV_SECTOR_BITS,
nb_chunks * sectors_per_chunk);
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks);
while (nb_chunks > 0 && offset < s->bdev_length) {
int64_t ret;
int io_sectors, io_sectors_acct;
int io_sectors;
unsigned int io_bytes;
int64_t io_bytes_acct;
BlockDriverState *file;
enum MirrorMethod {
MIRROR_METHOD_COPY,
@@ -399,27 +401,28 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
MIRROR_METHOD_DISCARD
} mirror_method = MIRROR_METHOD_COPY;
assert(!(sector_num % sectors_per_chunk));
ret = bdrv_get_block_status_above(source, NULL, sector_num,
assert(!(offset % s->granularity));
ret = bdrv_get_block_status_above(source, NULL,
offset >> BDRV_SECTOR_BITS,
nb_chunks * sectors_per_chunk,
&io_sectors, &file);
io_bytes = io_sectors * BDRV_SECTOR_SIZE;
if (ret < 0) {
io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes);
} else if (ret & BDRV_BLOCK_DATA) {
io_sectors = MIN(io_sectors, max_io_sectors);
io_bytes = MIN(io_bytes, max_io_bytes);
}
io_sectors -= io_sectors % sectors_per_chunk;
if (io_sectors < sectors_per_chunk) {
io_sectors = sectors_per_chunk;
io_bytes -= io_bytes % s->granularity;
if (io_bytes < s->granularity) {
io_bytes = s->granularity;
} else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
int64_t target_sector_num;
int target_nb_sectors;
bdrv_round_sectors_to_clusters(blk_bs(s->target), sector_num,
io_sectors, &target_sector_num,
&target_nb_sectors);
if (target_sector_num == sector_num &&
target_nb_sectors == io_sectors) {
int64_t target_offset;
unsigned int target_bytes;
bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
&target_offset, &target_bytes);
if (target_offset == offset &&
target_bytes == io_bytes) {
mirror_method = ret & BDRV_BLOCK_ZERO ?
MIRROR_METHOD_ZERO :
MIRROR_METHOD_DISCARD;
@@ -427,7 +430,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
}
while (s->in_flight >= MAX_IN_FLIGHT) {
trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
trace_mirror_yield_in_flight(s, offset, s->in_flight);
mirror_wait_for_io(s);
}
@@ -435,30 +438,29 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
return 0;
}
mirror_clip_sectors(s, sector_num, &io_sectors);
io_bytes = mirror_clip_bytes(s, offset, io_bytes);
switch (mirror_method) {
case MIRROR_METHOD_COPY:
io_sectors = mirror_do_read(s, sector_num, io_sectors);
io_sectors_acct = io_sectors;
io_bytes = io_bytes_acct = mirror_do_read(s, offset, io_bytes);
break;
case MIRROR_METHOD_ZERO:
case MIRROR_METHOD_DISCARD:
mirror_do_zero_or_discard(s, sector_num, io_sectors,
mirror_do_zero_or_discard(s, offset, io_bytes,
mirror_method == MIRROR_METHOD_DISCARD);
if (write_zeroes_ok) {
io_sectors_acct = 0;
io_bytes_acct = 0;
} else {
io_sectors_acct = io_sectors;
io_bytes_acct = io_bytes;
}
break;
default:
abort();
}
assert(io_sectors);
sector_num += io_sectors;
nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
assert(io_bytes);
offset += io_bytes;
nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
if (s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors_acct);
delay_ns = ratelimit_calculate_delay(&s->limit, io_bytes_acct);
}
}
return delay_ns;
@@ -506,6 +508,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
Error *local_err = NULL;
bdrv_release_dirty_bitmap(src, s->dirty_bitmap);
/* Make sure that the source BDS doesn't go away before we called
* block_job_completed(). */
bdrv_ref(src);
@@ -514,7 +518,12 @@ static void mirror_exit(BlockJob *job, void *opaque)
/* Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
* inserting target_bs at s->to_replace, where we might not be able to get
* these permissions. */
* these permissions.
*
* Note that blk_unref() alone doesn't necessarily drop permissions because
* we might be running nested inside mirror_drain(), which takes an extra
* reference, so use an explicit blk_set_perm() first. */
blk_set_perm(s->target, 0, BLK_PERM_ALL, &error_abort);
blk_unref(s->target);
s->target = NULL;
@@ -612,6 +621,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
int ret, n;
int64_t count;
end = s->bdev_length / BDRV_SECTOR_SIZE;
@@ -640,7 +650,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
continue;
}
mirror_do_zero_or_discard(s, sector_num, nb_sectors, false);
mirror_do_zero_or_discard(s, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE, false);
sector_num += nb_sectors;
}
@@ -660,11 +671,16 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
return 0;
}
ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
ret = bdrv_is_allocated_above(bs, base, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE, &count);
if (ret < 0) {
return ret;
}
/* TODO: Relax this once bdrv_is_allocated_above and dirty
* bitmaps no longer require sector alignment. */
assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
n = count >> BDRV_SECTOR_BITS;
assert(n > 0);
if (ret == 1) {
bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
@@ -700,7 +716,6 @@ static void coroutine_fn mirror_run(void *opaque)
char backing_filename[2]; /* we only need 2 characters because we are only
checking for a NULL string */
int ret = 0;
int target_cluster_size = BDRV_SECTOR_SIZE;
if (block_job_is_cancelled(&s->common)) {
goto immediate_exit;
@@ -724,7 +739,8 @@ static void coroutine_fn mirror_run(void *opaque)
}
if (s->bdev_length > base_length) {
ret = blk_truncate(s->target, s->bdev_length, NULL);
ret = blk_truncate(s->target, s->bdev_length, PREALLOC_MODE_OFF,
NULL);
if (ret < 0) {
goto immediate_exit;
}
@@ -752,14 +768,15 @@ static void coroutine_fn mirror_run(void *opaque)
bdrv_get_backing_filename(target_bs, backing_filename,
sizeof(backing_filename));
if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) {
target_cluster_size = bdi.cluster_size;
s->target_cluster_size = bdi.cluster_size;
} else {
s->target_cluster_size = BDRV_SECTOR_SIZE;
}
if (backing_filename[0] && !target_bs->backing
&& s->granularity < target_cluster_size) {
s->buf_size = MAX(s->buf_size, target_cluster_size);
if (backing_filename[0] && !target_bs->backing &&
s->granularity < s->target_cluster_size) {
s->buf_size = MAX(s->buf_size, s->target_cluster_size);
s->cow_bitmap = bitmap_new(length);
}
s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
s->buf = qemu_try_blockalign(bs, s->buf_size);
@@ -795,10 +812,10 @@ static void coroutine_fn mirror_run(void *opaque)
cnt = bdrv_get_dirty_count(s->dirty_bitmap);
/* s->common.offset contains the number of bytes already processed so
* far, cnt is the number of dirty sectors remaining and
* s->sectors_in_flight is the number of sectors currently being
* s->bytes_in_flight is the number of bytes currently being
* processed; together those are the current total operation length */
s->common.len = s->common.offset +
(cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;
s->common.len = s->common.offset + s->bytes_in_flight +
cnt * BDRV_SECTOR_SIZE;
/* Note that even when no rate limit is applied we need to yield
* periodically with no pending I/O so that bdrv_drain_all() returns.
@@ -810,7 +827,8 @@ static void coroutine_fn mirror_run(void *opaque)
s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
trace_mirror_yield(s, cnt * BDRV_SECTOR_SIZE,
s->buf_free_count, s->in_flight);
mirror_wait_for_io(s);
continue;
} else if (cnt != 0) {
@@ -851,7 +869,7 @@ static void coroutine_fn mirror_run(void *opaque)
* whether to switch to target check one last time if I/O has
* come in the meanwhile, and if not flush the data to disk.
*/
trace_mirror_before_drain(s, cnt);
trace_mirror_before_drain(s, cnt * BDRV_SECTOR_SIZE);
bdrv_drained_begin(bs);
cnt = bdrv_get_dirty_count(s->dirty_bitmap);
@@ -870,7 +888,8 @@ static void coroutine_fn mirror_run(void *opaque)
}
ret = 0;
trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
trace_mirror_before_sleep(s, cnt * BDRV_SECTOR_SIZE,
s->synced, delay_ns);
if (!s->synced) {
block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
if (block_job_is_cancelled(&s->common)) {
@@ -899,7 +918,6 @@ immediate_exit:
g_free(s->cow_bitmap);
g_free(s->in_flight_bitmap);
bdrv_dirty_iter_free(s->dbi);
bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
data = g_malloc(sizeof(*data));
data->ret = ret;
@@ -918,7 +936,7 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
}
static void mirror_complete(BlockJob *job, Error **errp)
@@ -1047,20 +1065,20 @@ static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
{
*pnum = nb_sectors;
*file = bs->backing->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
(sector_num << BDRV_SECTOR_BITS);
}
static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
int64_t offset, int bytes, BdrvRequestFlags flags)
{
return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags);
return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
}
static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
int64_t offset, int bytes)
{
return bdrv_co_pdiscard(bs->backing->bs, offset, count);
return bdrv_co_pdiscard(bs->backing->bs, offset, bytes);
}
static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts)
@@ -1130,6 +1148,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
}
assert ((granularity & (granularity - 1)) == 0);
/* Granularity must be large enough for sector-based dirty bitmap */
assert(granularity >= BDRV_SECTOR_SIZE);
if (buf_size < 0) {
error_setg(errp, "Invalid parameter 'buf-size'");
@@ -1252,7 +1272,7 @@ fail:
g_free(s->replaces);
blk_unref(s->target);
block_job_unref(&s->common);
block_job_early_fail(&s->common);
}
bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,

View File

@@ -28,6 +28,7 @@
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "nbd-client.h"
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
@@ -70,10 +71,14 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
NBDClientSession *s = opaque;
uint64_t i;
int ret;
Error *local_err = NULL;
for (;;) {
assert(s->reply.handle == 0);
ret = nbd_receive_reply(s->ioc, &s->reply);
ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
if (ret < 0) {
error_report_err(local_err);
}
if (ret <= 0) {
break;
}
@@ -114,6 +119,10 @@ static int nbd_co_send_request(BlockDriverState *bs,
int rc, ret, i;
qemu_co_mutex_lock(&s->send_mutex);
while (s->in_flight == MAX_NBD_REQUESTS) {
qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
}
s->in_flight++;
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
if (s->recv_coroutine[i] == NULL) {
@@ -135,8 +144,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
qio_channel_set_cork(s->ioc, true);
rc = nbd_send_request(s->ioc, request);
if (rc >= 0) {
ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
false);
ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
NULL);
if (ret != request->len) {
rc = -EIO;
}
@@ -164,8 +173,8 @@ static void nbd_co_receive_reply(NBDClientSession *s,
reply->error = EIO;
} else {
if (qiov && reply->error == 0) {
ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
true);
ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
NULL);
if (ret != request->len) {
reply->error = EIO;
}
@@ -176,20 +185,6 @@ static void nbd_co_receive_reply(NBDClientSession *s,
}
}
static void nbd_coroutine_start(NBDClientSession *s,
NBDRequest *request)
{
/* Poor man semaphore. The free_sema is locked when no other request
* can be accepted, and unlocked after receiving one reply. */
if (s->in_flight == MAX_NBD_REQUESTS) {
qemu_co_queue_wait(&s->free_sema, NULL);
assert(s->in_flight < MAX_NBD_REQUESTS);
}
s->in_flight++;
/* s->recv_coroutine[i] is set as soon as we get the send_lock. */
}
static void nbd_coroutine_end(BlockDriverState *bs,
NBDRequest *request)
{
@@ -197,13 +192,16 @@ static void nbd_coroutine_end(BlockDriverState *bs,
int i = HANDLE_TO_INDEX(s, request->handle);
s->recv_coroutine[i] = NULL;
s->in_flight--;
qemu_co_queue_next(&s->free_sema);
/* Kick the read_reply_co to get the next reply. */
if (s->read_reply_co) {
aio_co_wake(s->read_reply_co);
}
qemu_co_mutex_lock(&s->send_mutex);
s->in_flight--;
qemu_co_queue_next(&s->free_sema);
qemu_co_mutex_unlock(&s->send_mutex);
}
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
@@ -221,7 +219,6 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
assert(bytes <= NBD_MAX_BUFFER_SIZE);
assert(!flags);
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
@@ -245,13 +242,12 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
ssize_t ret;
if (flags & BDRV_REQ_FUA) {
assert(client->nbdflags & NBD_FLAG_SEND_FUA);
assert(client->info.flags & NBD_FLAG_SEND_FUA);
request.flags |= NBD_CMD_FLAG_FUA;
}
assert(bytes <= NBD_MAX_BUFFER_SIZE);
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, qiov);
if (ret < 0) {
reply.error = -ret;
@@ -263,30 +259,29 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
}
int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
int count, BdrvRequestFlags flags)
int bytes, BdrvRequestFlags flags)
{
ssize_t ret;
NBDClientSession *client = nbd_get_client_session(bs);
NBDRequest request = {
.type = NBD_CMD_WRITE_ZEROES,
.from = offset,
.len = count,
.len = bytes,
};
NBDReply reply;
if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
return -ENOTSUP;
}
if (flags & BDRV_REQ_FUA) {
assert(client->nbdflags & NBD_FLAG_SEND_FUA);
assert(client->info.flags & NBD_FLAG_SEND_FUA);
request.flags |= NBD_CMD_FLAG_FUA;
}
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
request.flags |= NBD_CMD_FLAG_NO_HOLE;
}
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
@@ -304,14 +299,13 @@ int nbd_client_co_flush(BlockDriverState *bs)
NBDReply reply;
ssize_t ret;
if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
return 0;
}
request.from = 0;
request.len = 0;
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
@@ -322,22 +316,21 @@ int nbd_client_co_flush(BlockDriverState *bs)
return -reply.error;
}
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
{
NBDClientSession *client = nbd_get_client_session(bs);
NBDRequest request = {
.type = NBD_CMD_TRIM,
.from = offset,
.len = count,
.len = bytes,
};
NBDReply reply;
ssize_t ret;
if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
if (!(client->info.flags & NBD_FLAG_SEND_TRIM)) {
return 0;
}
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
@@ -352,14 +345,14 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
void nbd_client_detach_aio_context(BlockDriverState *bs)
{
NBDClientSession *client = nbd_get_client_session(bs);
qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
}
void nbd_client_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
NBDClientSession *client = nbd_get_client_session(bs);
qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
aio_co_schedule(new_context, client->read_reply_co);
}
@@ -391,22 +384,24 @@ int nbd_client_init(BlockDriverState *bs,
logout("session init %s\n", export);
qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
client->info.request_sizes = true;
ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
&client->nbdflags,
tlscreds, hostname,
&client->ioc,
&client->size, errp);
&client->ioc, &client->info, errp);
if (ret < 0) {
logout("Failed to negotiate with the NBD server\n");
return ret;
}
if (client->nbdflags & NBD_FLAG_SEND_FUA) {
if (client->info.flags & NBD_FLAG_SEND_FUA) {
bs->supported_write_flags = BDRV_REQ_FUA;
bs->supported_zero_flags |= BDRV_REQ_FUA;
}
if (client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES) {
if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
}
if (client->info.min_block > bs->bl.request_alignment) {
bs->bl.request_alignment = client->info.min_block;
}
qemu_co_mutex_init(&client->send_mutex);
qemu_co_queue_init(&client->free_sema);

View File

@@ -20,8 +20,7 @@
typedef struct NBDClientSession {
QIOChannelSocket *sioc; /* The master data channel */
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
uint16_t nbdflags;
off_t size;
NBDExportInfo info;
CoMutex send_mutex;
CoQueue free_sema;
@@ -42,12 +41,12 @@ int nbd_client_init(BlockDriverState *bs,
Error **errp);
void nbd_client_close(BlockDriverState *bs);
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
int nbd_client_co_flush(BlockDriverState *bs);
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags);
int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
int count, BdrvRequestFlags flags);
int bytes, BdrvRequestFlags flags);
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags);

View File

@@ -37,7 +37,6 @@
#include "qapi/qobject-output-visitor.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qjson.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
#include "qemu/cutils.h"
@@ -65,11 +64,11 @@ static int nbd_parse_uri(const char *filename, QDict *options)
}
/* transport */
if (!strcmp(uri->scheme, "nbd")) {
if (!g_strcmp0(uri->scheme, "nbd")) {
is_unix = false;
} else if (!strcmp(uri->scheme, "nbd+tcp")) {
} else if (!g_strcmp0(uri->scheme, "nbd+tcp")) {
is_unix = false;
} else if (!strcmp(uri->scheme, "nbd+unix")) {
} else if (!g_strcmp0(uri->scheme, "nbd+unix")) {
is_unix = true;
} else {
ret = -EINVAL;
@@ -473,9 +472,17 @@ static int nbd_co_flush(BlockDriverState *bs)
static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
NBDClientSession *s = nbd_get_client_session(bs);
uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, s->info.max_block);
bs->bl.max_pdiscard = max;
bs->bl.max_pwrite_zeroes = max;
bs->bl.max_transfer = max;
if (s->info.opt_block &&
s->info.opt_block > bs->bl.opt_transfer) {
bs->bl.opt_transfer = s->info.opt_block;
}
}
static void nbd_close(BlockDriverState *bs)
@@ -493,7 +500,7 @@ static int64_t nbd_getlength(BlockDriverState *bs)
{
BDRVNBDState *s = bs->opaque;
return s->client.size;
return s->client.info.size;
}
static void nbd_detach_aio_context(BlockDriverState *bs)

View File

@@ -36,7 +36,6 @@
#include "qemu/cutils.h"
#include "sysemu/sysemu.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
#include "qapi-visit.h"
#include "qapi/qobject-input-visitor.h"
@@ -83,7 +82,7 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
error_setg(errp, "Invalid URI specified");
goto out;
}
if (strcmp(uri->scheme, "nfs") != 0) {
if (g_strcmp0(uri->scheme, "nfs") != 0) {
error_setg(errp, "URI scheme must be 'nfs'");
goto out;
}
@@ -559,8 +558,8 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
}
client->readahead = qemu_opt_get_number(opts, "readahead-size", 0);
if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
error_report("NFS Warning: Truncating NFS readahead "
"size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
warn_report("Truncating NFS readahead size to %d",
QEMU_NFS_MAX_READAHEAD_SIZE);
client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE;
}
nfs_set_readahead(client->context, client->readahead);
@@ -580,8 +579,8 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
}
client->pagecache = qemu_opt_get_number(opts, "page-cache-size", 0);
if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
error_report("NFS Warning: Truncating NFS pagecache "
"size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
warn_report("Truncating NFS pagecache size to %d pages",
QEMU_NFS_MAX_PAGECACHE_SIZE);
client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE;
}
nfs_set_pagecache(client->context, client->pagecache);
@@ -596,8 +595,8 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
/* limit the maximum debug level to avoid potential flooding
* of our log files. */
if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) {
error_report("NFS Warning: Limiting NFS debug level "
"to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
warn_report("Limiting NFS debug level to %d",
QEMU_NFS_MAX_DEBUG_LEVEL);
client->debug = QEMU_NFS_MAX_DEBUG_LEVEL;
}
nfs_set_debug(client->context, client->debug);
@@ -730,7 +729,9 @@ nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
if (task->ret < 0) {
error_report("NFS Error: %s", nfs_get_error(nfs));
}
task->complete = 1;
/* Set task->complete before reading bs->wakeup. */
atomic_mb_set(&task->complete, 1);
bdrv_wakeup(task->bs);
}
@@ -758,11 +759,18 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
return (task.ret < 0 ? task.ret : st.st_blocks * 512);
}
static int nfs_file_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
static int nfs_file_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
NFSClient *client = bs->opaque;
int ret;
if (prealloc != PREALLOC_MODE_OFF) {
error_setg(errp, "Unsupported preallocation mode '%s'",
PreallocMode_lookup[prealloc]);
return -ENOTSUP;
}
ret = nfs_ftruncate(client->context, client->fh, offset);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to truncate file");

View File

@@ -224,7 +224,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
} else {
ret = bdrv_truncate(bs->file,
(s->data_end + space) << BDRV_SECTOR_BITS,
NULL);
PREALLOC_MODE_OFF, NULL);
}
if (ret < 0) {
return ret;
@@ -458,7 +458,8 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
res->leaks += count;
if (fix & BDRV_FIX_LEAKS) {
Error *local_err = NULL;
ret = bdrv_truncate(bs->file, res->image_end_offset, &local_err);
ret = bdrv_truncate(bs->file, res->image_end_offset,
PREALLOC_MODE_OFF, &local_err);
if (ret < 0) {
error_report_err(local_err);
res->check_errors++;
@@ -507,7 +508,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
blk_set_allow_write_beyond_eof(file, true);
ret = blk_truncate(file, 0, errp);
ret = blk_truncate(file, 0, PREALLOC_MODE_OFF, errp);
if (ret < 0) {
goto exit;
}
@@ -699,7 +700,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
}
if (!(flags & BDRV_O_RESIZE) || !bdrv_has_zero_init(bs->file->bs) ||
bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs), NULL) != 0) {
bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs),
PREALLOC_MODE_OFF, NULL) != 0) {
s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
}
@@ -742,7 +744,8 @@ static void parallels_close(BlockDriverState *bs)
}
if (bs->open_flags & BDRV_O_RDWR) {
bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, NULL);
bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS,
PREALLOC_MODE_OFF, NULL);
}
g_free(s->bat_dirty_bmap);

View File

@@ -45,7 +45,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
info->ro = bs->read_only;
info->drv = g_strdup(bs->drv->format_name);
info->encrypted = bs->encrypted;
info->encryption_key_missing = bdrv_key_required(bs);
info->encryption_key_missing = false;
info->cache = g_new(BlockdevCacheInfo, 1);
*info->cache = (BlockdevCacheInfo) {
@@ -322,11 +322,21 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
{
BlockInfo *info = g_malloc0(sizeof(*info));
BlockDriverState *bs = blk_bs(blk);
char *qdev;
info->device = g_strdup(blk_name(blk));
info->type = g_strdup("unknown");
info->locked = blk_dev_is_medium_locked(blk);
info->removable = blk_dev_has_removable_media(blk);
qdev = blk_get_attached_dev_id(blk);
if (qdev && *qdev) {
info->has_qdev = true;
info->qdev = qdev;
} else {
g_free(qdev);
}
if (blk_dev_has_tray(blk)) {
info->has_tray_open = true;
info->tray_open = blk_dev_is_tray_open(blk);
@@ -441,7 +451,7 @@ static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,
s->node_name = g_strdup(bdrv_get_node_name(bs));
}
s->stats->wr_highest_offset = bs->wr_highest_offset;
s->stats->wr_highest_offset = stat64_get(&bs->wr_highest_offset);
if (bs->file) {
s->has_parent = true;
@@ -462,8 +472,14 @@ BlockInfoList *qmp_query_block(Error **errp)
BlockBackend *blk;
Error *local_err = NULL;
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
BlockInfoList *info = g_malloc0(sizeof(*info));
for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
BlockInfoList *info;
if (!*blk_name(blk) && !blk_get_attached_dev(blk)) {
continue;
}
info = g_malloc0(sizeof(*info));
bdrv_query_info(blk, &info->value, &local_err);
if (local_err) {
error_propagate(errp, local_err);
@@ -595,9 +611,11 @@ static void dump_qobject(fprintf_function func_fprintf, void *f,
int comp_indent, QObject *obj)
{
switch (qobject_type(obj)) {
case QTYPE_QINT: {
QInt *value = qobject_to_qint(obj);
func_fprintf(f, "%" PRId64, qint_get_int(value));
case QTYPE_QNUM: {
QNum *value = qobject_to_qnum(obj);
char *tmp = qnum_to_string(value);
func_fprintf(f, "%s", tmp);
g_free(tmp);
break;
}
case QTYPE_QSTRING: {
@@ -615,11 +633,6 @@ static void dump_qobject(fprintf_function func_fprintf, void *f,
dump_qlist(func_fprintf, f, comp_indent, value);
break;
}
case QTYPE_QFLOAT: {
QFloat *value = qobject_to_qfloat(obj);
func_fprintf(f, "%g", qfloat_get_double(value));
break;
}
case QTYPE_QBOOL: {
QBool *value = qobject_to_qbool(obj);
func_fprintf(f, "%s", qbool_get_bool(value) ? "true" : "false");

View File

@@ -31,8 +31,10 @@
#include "qemu/bswap.h"
#include <zlib.h>
#include "qapi/qmp/qerror.h"
#include "crypto/cipher.h"
#include "qapi/qmp/qstring.h"
#include "crypto/block.h"
#include "migration/blocker.h"
#include "block/crypto.h"
/**************************************************************/
/* QEMU COW block driver with compression and encryption support */
@@ -77,7 +79,7 @@ typedef struct BDRVQcowState {
uint8_t *cluster_cache;
uint8_t *cluster_data;
uint64_t cluster_cache_offset;
QCryptoCipher *cipher; /* NULL if no key yet */
QCryptoBlock *crypto; /* Disk encryption format driver */
uint32_t crypt_method_header;
CoMutex lock;
Error *migration_blocker;
@@ -97,6 +99,15 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
static QemuOptsList qcow_runtime_opts = {
.name = "qcow",
.head = QTAILQ_HEAD_INITIALIZER(qcow_runtime_opts.head),
.desc = {
BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."),
{ /* end of list */ }
},
};
static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -105,11 +116,19 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
QCowHeader header;
Error *local_err = NULL;
QCryptoBlockOpenOptions *crypto_opts = NULL;
unsigned int cflags = 0;
QDict *encryptopts = NULL;
const char *encryptfmt;
qdict_extract_subqdict(options, &encryptopts, "encrypt.");
encryptfmt = qdict_get_try_str(encryptopts, "format");
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
ret = -EINVAL;
goto fail;
}
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
@@ -155,17 +174,6 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
if (header.crypt_method > QCOW_CRYPT_AES) {
error_setg(errp, "invalid encryption method in qcow header");
ret = -EINVAL;
goto fail;
}
if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128,
QCRYPTO_CIPHER_MODE_CBC)) {
error_setg(errp, "AES cipher not available");
ret = -EINVAL;
goto fail;
}
s->crypt_method_header = header.crypt_method;
if (s->crypt_method_header) {
if (bdrv_uses_whitelist() &&
@@ -181,8 +189,44 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
ret = -ENOSYS;
goto fail;
}
if (s->crypt_method_header == QCOW_CRYPT_AES) {
if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
error_setg(errp,
"Header reported 'aes' encryption format but "
"options specify '%s'", encryptfmt);
ret = -EINVAL;
goto fail;
}
qdict_del(encryptopts, "format");
crypto_opts = block_crypto_open_opts_init(
Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
if (!crypto_opts) {
ret = -EINVAL;
goto fail;
}
if (flags & BDRV_O_NO_IO) {
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
}
s->crypto = qcrypto_block_open(crypto_opts, "encrypt.",
NULL, NULL, cflags, errp);
if (!s->crypto) {
ret = -EINVAL;
goto fail;
}
} else {
error_setg(errp, "invalid encryption method in qcow header");
ret = -EINVAL;
goto fail;
}
bs->encrypted = true;
} else {
if (encryptfmt) {
error_setg(errp, "No encryption in image header, but options "
"specified format '%s'", encryptfmt);
ret = -EINVAL;
goto fail;
}
}
s->cluster_bits = header.cluster_bits;
s->cluster_size = 1 << s->cluster_bits;
@@ -266,6 +310,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
QDECREF(encryptopts);
qapi_free_QCryptoBlockOpenOptions(crypto_opts);
qemu_co_mutex_init(&s->lock);
return 0;
@@ -274,6 +320,9 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
qemu_vfree(s->l2_cache);
g_free(s->cluster_cache);
g_free(s->cluster_data);
qcrypto_block_free(s->crypto);
QDECREF(encryptopts);
qapi_free_QCryptoBlockOpenOptions(crypto_opts);
return ret;
}
@@ -286,85 +335,6 @@ static int qcow_reopen_prepare(BDRVReopenState *state,
return 0;
}
static int qcow_set_key(BlockDriverState *bs, const char *key)
{
BDRVQcowState *s = bs->opaque;
uint8_t keybuf[16];
int len, i;
Error *err;
memset(keybuf, 0, 16);
len = strlen(key);
if (len > 16)
len = 16;
/* XXX: we could compress the chars to 7 bits to increase
entropy */
for(i = 0;i < len;i++) {
keybuf[i] = key[i];
}
assert(bs->encrypted);
qcrypto_cipher_free(s->cipher);
s->cipher = qcrypto_cipher_new(
QCRYPTO_CIPHER_ALG_AES_128,
QCRYPTO_CIPHER_MODE_CBC,
keybuf, G_N_ELEMENTS(keybuf),
&err);
if (!s->cipher) {
/* XXX would be nice if errors in this method could
* be properly propagate to the caller. Would need
* the bdrv_set_key() API signature to be fixed. */
error_free(err);
return -1;
}
return 0;
}
/* The crypt function is compatible with the linux cryptoloop
algorithm for < 4 GB images. NOTE: out_buf == in_buf is
supported */
static int encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
uint8_t *out_buf, const uint8_t *in_buf,
int nb_sectors, bool enc, Error **errp)
{
union {
uint64_t ll[2];
uint8_t b[16];
} ivec;
int i;
int ret;
for(i = 0; i < nb_sectors; i++) {
ivec.ll[0] = cpu_to_le64(sector_num);
ivec.ll[1] = 0;
if (qcrypto_cipher_setiv(s->cipher,
ivec.b, G_N_ELEMENTS(ivec.b),
errp) < 0) {
return -1;
}
if (enc) {
ret = qcrypto_cipher_encrypt(s->cipher,
in_buf,
out_buf,
512,
errp);
} else {
ret = qcrypto_cipher_decrypt(s->cipher,
in_buf,
out_buf,
512,
errp);
}
if (ret < 0) {
return -1;
}
sector_num++;
in_buf += 512;
out_buf += 512;
}
return 0;
}
/* 'allocate' is:
*
@@ -473,22 +443,23 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* round to cluster size */
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
bdrv_truncate(bs->file, cluster_offset + s->cluster_size, NULL);
bdrv_truncate(bs->file, cluster_offset + s->cluster_size,
PREALLOC_MODE_OFF, NULL);
/* if encrypted, we must initialize the cluster
content which won't be written */
if (bs->encrypted &&
(n_end - n_start) < s->cluster_sectors) {
uint64_t start_sect;
assert(s->cipher);
assert(s->crypto);
start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
memset(s->cluster_data + 512, 0x00, 512);
for(i = 0; i < s->cluster_sectors; i++) {
if (i < n_start || i >= n_end) {
Error *err = NULL;
if (encrypt_sectors(s, start_sect + i,
s->cluster_data,
s->cluster_data + 512, 1,
true, &err) < 0) {
memset(s->cluster_data, 0x00, 512);
if (qcrypto_block_encrypt(s->crypto, start_sect + i,
s->cluster_data,
BDRV_SECTOR_SIZE,
&err) < 0) {
error_free(err);
errno = EIO;
return -1;
@@ -533,7 +504,7 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
if (!cluster_offset) {
return 0;
}
if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->cipher) {
if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) {
return BDRV_BLOCK_DATA;
}
cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
@@ -664,9 +635,9 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
break;
}
if (bs->encrypted) {
assert(s->cipher);
if (encrypt_sectors(s, sector_num, buf, buf,
n, false, &err) < 0) {
assert(s->crypto);
if (qcrypto_block_decrypt(s->crypto, sector_num, buf,
n * BDRV_SECTOR_SIZE, &err) < 0) {
goto fail;
}
}
@@ -700,9 +671,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
BDRVQcowState *s = bs->opaque;
int index_in_cluster;
uint64_t cluster_offset;
const uint8_t *src_buf;
int ret = 0, n;
uint8_t *cluster_data = NULL;
struct iovec hd_iov;
QEMUIOVector hd_qiov;
uint8_t *buf;
@@ -710,7 +679,9 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
s->cluster_cache_offset = -1; /* disable compressed cache */
if (qiov->niov > 1) {
/* We must always copy the iov when encrypting, so we
* don't modify the original data buffer during encryption */
if (bs->encrypted || qiov->niov > 1) {
buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
if (buf == NULL) {
return -ENOMEM;
@@ -739,22 +710,16 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
}
if (bs->encrypted) {
Error *err = NULL;
assert(s->cipher);
if (!cluster_data) {
cluster_data = g_malloc0(s->cluster_size);
}
if (encrypt_sectors(s, sector_num, cluster_data, buf,
n, true, &err) < 0) {
assert(s->crypto);
if (qcrypto_block_encrypt(s->crypto, sector_num, buf,
n * BDRV_SECTOR_SIZE, &err) < 0) {
error_free(err);
ret = -EIO;
break;
}
src_buf = cluster_data;
} else {
src_buf = buf;
}
hd_iov.iov_base = (void *)src_buf;
hd_iov.iov_base = (void *)buf;
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
@@ -773,10 +738,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
}
qemu_co_mutex_unlock(&s->lock);
if (qiov->niov > 1) {
qemu_vfree(orig_buf);
}
g_free(cluster_data);
qemu_vfree(orig_buf);
return ret;
}
@@ -785,8 +747,8 @@ static void qcow_close(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
qcrypto_cipher_free(s->cipher);
s->cipher = NULL;
qcrypto_block_free(s->crypto);
s->crypto = NULL;
g_free(s->l1_table);
qemu_vfree(s->l2_cache);
g_free(s->cluster_cache);
@@ -803,17 +765,35 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
uint8_t *tmp;
int64_t total_size = 0;
char *backing_file = NULL;
int flags = 0;
Error *local_err = NULL;
int ret;
BlockBackend *qcow_blk;
const char *encryptfmt = NULL;
QDict *options;
QDict *encryptopts = NULL;
QCryptoBlockCreateOptions *crypto_opts = NULL;
QCryptoBlock *crypto = NULL;
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
if (total_size == 0) {
error_setg(errp, "Image size is too small, cannot be zero length");
ret = -EINVAL;
goto cleanup;
}
backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
flags |= BLOCK_FLAG_ENCRYPT;
encryptfmt = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
if (encryptfmt) {
if (qemu_opt_get(opts, BLOCK_OPT_ENCRYPT)) {
error_setg(errp, "Options " BLOCK_OPT_ENCRYPT " and "
BLOCK_OPT_ENCRYPT_FORMAT " are mutually exclusive");
ret = -EINVAL;
goto cleanup;
}
} else if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
encryptfmt = "aes";
}
ret = bdrv_create_file(filename, opts, &local_err);
@@ -833,7 +813,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
blk_set_allow_write_beyond_eof(qcow_blk, true);
ret = blk_truncate(qcow_blk, 0, errp);
ret = blk_truncate(qcow_blk, 0, PREALLOC_MODE_OFF, errp);
if (ret < 0) {
goto exit;
}
@@ -852,6 +832,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
header_size += backing_filename_len;
} else {
/* special backing file for vvfat */
g_free(backing_file);
backing_file = NULL;
}
header.cluster_bits = 9; /* 512 byte cluster to avoid copying
@@ -866,8 +847,32 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
l1_size = (total_size + (1LL << shift) - 1) >> shift;
header.l1_table_offset = cpu_to_be64(header_size);
if (flags & BLOCK_FLAG_ENCRYPT) {
options = qemu_opts_to_qdict(opts, NULL);
qdict_extract_subqdict(options, &encryptopts, "encrypt.");
QDECREF(options);
if (encryptfmt) {
if (!g_str_equal(encryptfmt, "aes")) {
error_setg(errp, "Unknown encryption format '%s', expected 'aes'",
encryptfmt);
ret = -EINVAL;
goto exit;
}
header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
crypto_opts = block_crypto_create_opts_init(
Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
if (!crypto_opts) {
ret = -EINVAL;
goto exit;
}
crypto = qcrypto_block_create(crypto_opts, "encrypt.",
NULL, NULL, NULL, errp);
if (!crypto) {
ret = -EINVAL;
goto exit;
}
} else {
header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
}
@@ -902,6 +907,9 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
exit:
blk_unref(qcow_blk);
cleanup:
QDECREF(encryptopts);
qcrypto_block_free(crypto);
qapi_free_QCryptoBlockCreateOptions(crypto_opts);
g_free(backing_file);
return ret;
}
@@ -916,7 +924,8 @@ static int qcow_make_empty(BlockDriverState *bs)
if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
l1_length) < 0)
return -1;
ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, NULL);
ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length,
PREALLOC_MODE_OFF, NULL);
if (ret < 0)
return ret;
@@ -1040,9 +1049,15 @@ static QemuOptsList qcow_create_opts = {
{
.name = BLOCK_OPT_ENCRYPT,
.type = QEMU_OPT_BOOL,
.help = "Encrypt the image",
.def_value_str = "off"
.help = "Encrypt the image with format 'aes'. (Deprecated "
"in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
},
{
.name = BLOCK_OPT_ENCRYPT_FORMAT,
.type = QEMU_OPT_STRING,
.help = "Encrypt the image, format choices: 'aes'",
},
BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."),
{ /* end of list */ }
}
};
@@ -1063,7 +1078,6 @@ static BlockDriver bdrv_qcow = {
.bdrv_co_writev = qcow_co_writev,
.bdrv_co_get_block_status = qcow_co_get_block_status,
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
.bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed,
.bdrv_get_info = qcow_get_info,

1482
block/qcow2-bitmap.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -357,76 +357,21 @@ static int count_contiguous_clusters_unallocated(int nb_clusters,
return i;
}
/* The crypt function is compatible with the linux cryptoloop
algorithm for < 4 GB images. NOTE: out_buf == in_buf is
supported */
int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
uint8_t *out_buf, const uint8_t *in_buf,
int nb_sectors, bool enc,
Error **errp)
static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
uint64_t src_cluster_offset,
unsigned offset_in_cluster,
QEMUIOVector *qiov)
{
union {
uint64_t ll[2];
uint8_t b[16];
} ivec;
int i;
int ret;
for(i = 0; i < nb_sectors; i++) {
ivec.ll[0] = cpu_to_le64(sector_num);
ivec.ll[1] = 0;
if (qcrypto_cipher_setiv(s->cipher,
ivec.b, G_N_ELEMENTS(ivec.b),
errp) < 0) {
return -1;
}
if (enc) {
ret = qcrypto_cipher_encrypt(s->cipher,
in_buf,
out_buf,
512,
errp);
} else {
ret = qcrypto_cipher_decrypt(s->cipher,
in_buf,
out_buf,
512,
errp);
}
if (ret < 0) {
return -1;
}
sector_num++;
in_buf += 512;
out_buf += 512;
if (qiov->size == 0) {
return 0;
}
return 0;
}
static int coroutine_fn do_perform_cow(BlockDriverState *bs,
uint64_t src_cluster_offset,
uint64_t cluster_offset,
int offset_in_cluster,
int bytes)
{
BDRVQcow2State *s = bs->opaque;
QEMUIOVector qiov;
struct iovec iov;
int ret;
iov.iov_len = bytes;
iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
if (iov.iov_base == NULL) {
return -ENOMEM;
}
qemu_iovec_init_external(&qiov, &iov, 1);
BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
if (!bs->drv) {
ret = -ENOMEDIUM;
goto out;
return -ENOMEDIUM;
}
/* Call .bdrv_co_readv() directly instead of using the public block-layer
@@ -434,43 +379,63 @@ static int coroutine_fn do_perform_cow(BlockDriverState *bs,
* which can lead to deadlock when block layer copy-on-read is enabled.
*/
ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster,
bytes, &qiov, 0);
qiov->size, qiov, 0);
if (ret < 0) {
goto out;
return ret;
}
if (bs->encrypted) {
Error *err = NULL;
int64_t sector = (src_cluster_offset + offset_in_cluster)
return 0;
}
static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
uint64_t src_cluster_offset,
uint64_t cluster_offset,
unsigned offset_in_cluster,
uint8_t *buffer,
unsigned bytes)
{
if (bytes && bs->encrypted) {
BDRVQcow2State *s = bs->opaque;
int64_t sector = (s->crypt_physical_offset ?
(cluster_offset + offset_in_cluster) :
(src_cluster_offset + offset_in_cluster))
>> BDRV_SECTOR_BITS;
assert(s->cipher);
assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
assert((bytes & ~BDRV_SECTOR_MASK) == 0);
if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base,
bytes >> BDRV_SECTOR_BITS, true, &err) < 0) {
ret = -EIO;
error_free(err);
goto out;
assert(s->crypto);
if (qcrypto_block_encrypt(s->crypto, sector, buffer,
bytes, NULL) < 0) {
return false;
}
}
return true;
}
static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
uint64_t cluster_offset,
unsigned offset_in_cluster,
QEMUIOVector *qiov)
{
int ret;
if (qiov->size == 0) {
return 0;
}
ret = qcow2_pre_write_overlap_check(bs, 0,
cluster_offset + offset_in_cluster, bytes);
cluster_offset + offset_in_cluster, qiov->size);
if (ret < 0) {
goto out;
return ret;
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
bytes, &qiov, 0);
qiov->size, qiov, 0);
if (ret < 0) {
goto out;
return ret;
}
ret = 0;
out:
qemu_vfree(iov.iov_base);
return ret;
return 0;
}
@@ -548,7 +513,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
/* find the cluster offset for the given disk offset */
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
l2_index = offset_to_l2_index(s, offset);
*cluster_offset = be64_to_cpu(l2_table[l2_index]);
nb_clusters = size_to_clusters(s, bytes_needed);
@@ -685,7 +650,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
/* find the cluster offset for the given disk offset */
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
l2_index = offset_to_l2_index(s, offset);
*new_l2_table = l2_table;
*new_l2_index = l2_index;
@@ -753,31 +718,134 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
return cluster_offset;
}
static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
{
BDRVQcow2State *s = bs->opaque;
Qcow2COWRegion *start = &m->cow_start;
Qcow2COWRegion *end = &m->cow_end;
unsigned buffer_size;
unsigned data_bytes = end->offset - (start->offset + start->nb_bytes);
bool merge_reads;
uint8_t *start_buffer, *end_buffer;
QEMUIOVector qiov;
int ret;
if (r->nb_bytes == 0) {
assert(start->nb_bytes <= UINT_MAX - end->nb_bytes);
assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes);
assert(start->offset + start->nb_bytes <= end->offset);
assert(!m->data_qiov || m->data_qiov->size == data_bytes);
if (start->nb_bytes == 0 && end->nb_bytes == 0) {
return 0;
}
qemu_co_mutex_unlock(&s->lock);
ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
return ret;
/* If we have to read both the start and end COW regions and the
* middle region is not too large then perform just one read
* operation */
merge_reads = start->nb_bytes && end->nb_bytes && data_bytes <= 16384;
if (merge_reads) {
buffer_size = start->nb_bytes + data_bytes + end->nb_bytes;
} else {
/* If we have to do two reads, add some padding in the middle
* if necessary to make sure that the end region is optimally
* aligned. */
size_t align = bdrv_opt_mem_align(bs);
assert(align > 0 && align <= UINT_MAX);
assert(QEMU_ALIGN_UP(start->nb_bytes, align) <=
UINT_MAX - end->nb_bytes);
buffer_size = QEMU_ALIGN_UP(start->nb_bytes, align) + end->nb_bytes;
}
/* Reserve a buffer large enough to store all the data that we're
* going to read */
start_buffer = qemu_try_blockalign(bs, buffer_size);
if (start_buffer == NULL) {
return -ENOMEM;
}
/* The part of the buffer where the end region is located */
end_buffer = start_buffer + buffer_size - end->nb_bytes;
qemu_iovec_init(&qiov, 2 + (m->data_qiov ? m->data_qiov->niov : 0));
qemu_co_mutex_unlock(&s->lock);
/* First we read the existing data from both COW regions. We
* either read the whole region in one go, or the start and end
* regions separately. */
if (merge_reads) {
qemu_iovec_add(&qiov, start_buffer, buffer_size);
ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov);
} else {
qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov);
if (ret < 0) {
goto fail;
}
qemu_iovec_reset(&qiov);
qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov);
}
if (ret < 0) {
goto fail;
}
/* Encrypt the data if necessary before writing it */
if (bs->encrypted) {
if (!do_perform_cow_encrypt(bs, m->offset, m->alloc_offset,
start->offset, start_buffer,
start->nb_bytes) ||
!do_perform_cow_encrypt(bs, m->offset, m->alloc_offset,
end->offset, end_buffer, end->nb_bytes)) {
ret = -EIO;
goto fail;
}
}
/* And now we can write everything. If we have the guest data we
* can write everything in one single operation */
if (m->data_qiov) {
qemu_iovec_reset(&qiov);
if (start->nb_bytes) {
qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
}
qemu_iovec_concat(&qiov, m->data_qiov, 0, data_bytes);
if (end->nb_bytes) {
qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
}
/* NOTE: we have a write_aio blkdebug event here followed by
* a cow_write one in do_perform_cow_write(), but there's only
* one single I/O operation */
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
} else {
/* If there's no guest data then write both COW regions separately */
qemu_iovec_reset(&qiov);
qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
if (ret < 0) {
goto fail;
}
qemu_iovec_reset(&qiov);
qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov);
}
fail:
qemu_co_mutex_lock(&s->lock);
/*
* Before we update the L2 table to actually point to the new cluster, we
* need to be sure that the refcounts have been increased and COW was
* handled.
*/
qcow2_cache_depends_on_flush(s->l2_table_cache);
if (ret == 0) {
qcow2_cache_depends_on_flush(s->l2_table_cache);
}
return 0;
qemu_vfree(start_buffer);
qemu_iovec_destroy(&qiov);
return ret;
}
int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
@@ -797,12 +865,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
}
/* copy content of unmodified sectors */
ret = perform_cow(bs, m, &m->cow_start);
if (ret < 0) {
goto err;
}
ret = perform_cow(bs, m, &m->cow_end);
ret = perform_cow(bs, m);
if (ret < 0) {
goto err;
}
@@ -1797,7 +1860,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
}
if (offset_into_cluster(s, offset)) {
qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
qcow2_signal_corruption(bs, true, -1, -1,
"Cluster allocation offset "
"%#" PRIx64 " unaligned (L2 offset: %#"
PRIx64 ", L2 index: %#x)", offset,
l2_offset, j);

View File

@@ -281,25 +281,6 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
return 0;
}
/*
* Rounds the refcount table size up to avoid growing the table for each single
* refcount block that is allocated.
*/
static unsigned int next_refcount_table_size(BDRVQcow2State *s,
unsigned int min_size)
{
unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
unsigned int refcount_table_clusters =
MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
while (min_clusters > refcount_table_clusters) {
refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
}
return refcount_table_clusters << (s->cluster_bits - 3);
}
/* Checks if two offsets are described by the same refcount block */
static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
uint64_t offset_b)
@@ -321,7 +302,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
{
BDRVQcow2State *s = bs->opaque;
unsigned int refcount_table_index;
int ret;
int64_t ret;
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
@@ -396,7 +377,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
refcount_block);
if (ret < 0) {
goto fail_block;
goto fail;
}
memset(*refcount_block, 0, s->cluster_size);
@@ -411,12 +392,12 @@ static int alloc_refcount_block(BlockDriverState *bs,
ret = update_refcount(bs, new_block, s->cluster_size, 1, false,
QCOW2_DISCARD_NEVER);
if (ret < 0) {
goto fail_block;
goto fail;
}
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
goto fail_block;
goto fail;
}
/* Initialize the new refcount block only after updating its refcount,
@@ -424,7 +405,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
refcount_block);
if (ret < 0) {
goto fail_block;
goto fail;
}
memset(*refcount_block, 0, s->cluster_size);
@@ -435,7 +416,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block);
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
goto fail_block;
goto fail;
}
/* If the refcount table is big enough, just hook the block up there */
@@ -446,7 +427,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
&data64, sizeof(data64));
if (ret < 0) {
goto fail_block;
goto fail;
}
s->refcount_table[refcount_table_index] = new_block;
@@ -490,74 +471,201 @@ static int alloc_refcount_block(BlockDriverState *bs,
(new_block >> s->cluster_bits) + 1),
s->refcount_block_size);
if (blocks_used > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
return -EFBIG;
}
/* And now we need at least one block more for the new metadata */
uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
uint64_t last_table_size;
uint64_t blocks_clusters;
do {
uint64_t table_clusters =
size_to_clusters(s, table_size * sizeof(uint64_t));
blocks_clusters = 1 +
DIV_ROUND_UP(table_clusters, s->refcount_block_size);
uint64_t meta_clusters = table_clusters + blocks_clusters;
last_table_size = table_size;
table_size = next_refcount_table_size(s, blocks_used +
DIV_ROUND_UP(meta_clusters, s->refcount_block_size));
} while (last_table_size != table_size);
#ifdef DEBUG_ALLOC2
fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
s->refcount_table_size, table_size);
#endif
/* Create the new refcount table and blocks */
uint64_t meta_offset = (blocks_used * s->refcount_block_size) *
s->cluster_size;
uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
uint64_t *new_table = g_try_new0(uint64_t, table_size);
void *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size);
assert(table_size > 0 && blocks_clusters > 0);
if (new_table == NULL || new_blocks == NULL) {
ret = qcow2_refcount_area(bs, meta_offset, 0, false,
refcount_table_index, new_block);
if (ret < 0) {
return ret;
}
ret = load_refcount_block(bs, new_block, refcount_block);
if (ret < 0) {
return ret;
}
/* If we were trying to do the initial refcount update for some cluster
* allocation, we might have used the same clusters to store newly
* allocated metadata. Make the caller search some new space. */
return -EAGAIN;
fail:
if (*refcount_block != NULL) {
qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
}
return ret;
}
/*
* Starting at @start_offset, this function creates new self-covering refcount
* structures: A new refcount table and refcount blocks which cover all of
* themselves, and a number of @additional_clusters beyond their end.
* @start_offset must be at the end of the image file, that is, there must be
* only empty space beyond it.
* If @exact_size is false, the refcount table will have 50 % more entries than
* necessary so it will not need to grow again soon.
* If @new_refblock_offset is not zero, it contains the offset of a refcount
* block that should be entered into the new refcount table at index
* @new_refblock_index.
*
* Returns: The offset after the new refcount structures (i.e. where the
* @additional_clusters may be placed) on success, -errno on error.
*/
int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
uint64_t additional_clusters, bool exact_size,
int new_refblock_index,
uint64_t new_refblock_offset)
{
BDRVQcow2State *s = bs->opaque;
uint64_t total_refblock_count_u64, additional_refblock_count;
int total_refblock_count, table_size, area_reftable_index, table_clusters;
int i;
uint64_t table_offset, block_offset, end_offset;
int ret;
uint64_t *new_table;
assert(!(start_offset % s->cluster_size));
qcow2_refcount_metadata_size(start_offset / s->cluster_size +
additional_clusters,
s->cluster_size, s->refcount_order,
!exact_size, &total_refblock_count_u64);
if (total_refblock_count_u64 > QCOW_MAX_REFTABLE_SIZE) {
return -EFBIG;
}
total_refblock_count = total_refblock_count_u64;
/* Index in the refcount table of the first refcount block to cover the area
* of refcount structures we are about to create; we know that
* @total_refblock_count can cover @start_offset, so this will definitely
* fit into an int. */
area_reftable_index = (start_offset / s->cluster_size) /
s->refcount_block_size;
if (exact_size) {
table_size = total_refblock_count;
} else {
table_size = total_refblock_count +
DIV_ROUND_UP(total_refblock_count, 2);
}
/* The qcow2 file can only store the reftable size in number of clusters */
table_size = ROUND_UP(table_size, s->cluster_size / sizeof(uint64_t));
table_clusters = (table_size * sizeof(uint64_t)) / s->cluster_size;
if (table_size > QCOW_MAX_REFTABLE_SIZE) {
return -EFBIG;
}
new_table = g_try_new0(uint64_t, table_size);
assert(table_size > 0);
if (new_table == NULL) {
ret = -ENOMEM;
goto fail_table;
goto fail;
}
/* Fill the new refcount table */
memcpy(new_table, s->refcount_table,
s->refcount_table_size * sizeof(uint64_t));
new_table[refcount_table_index] = new_block;
int i;
for (i = 0; i < blocks_clusters; i++) {
new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
if (table_size > s->max_refcount_table_index) {
/* We're actually growing the reftable */
memcpy(new_table, s->refcount_table,
(s->max_refcount_table_index + 1) * sizeof(uint64_t));
} else {
/* Improbable case: We're shrinking the reftable. However, the caller
* has assured us that there is only empty space beyond @start_offset,
* so we can simply drop all of the refblocks that won't fit into the
* new reftable. */
memcpy(new_table, s->refcount_table, table_size * sizeof(uint64_t));
}
/* Fill the refcount blocks */
uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
int block = 0;
for (i = 0; i < table_clusters + blocks_clusters; i++) {
s->set_refcount(new_blocks, block++, 1);
if (new_refblock_offset) {
assert(new_refblock_index < total_refblock_count);
new_table[new_refblock_index] = new_refblock_offset;
}
/* Count how many new refblocks we have to create */
additional_refblock_count = 0;
for (i = area_reftable_index; i < total_refblock_count; i++) {
if (!new_table[i]) {
additional_refblock_count++;
}
}
table_offset = start_offset + additional_refblock_count * s->cluster_size;
end_offset = table_offset + table_clusters * s->cluster_size;
/* Fill the refcount blocks, and create new ones, if necessary */
block_offset = start_offset;
for (i = area_reftable_index; i < total_refblock_count; i++) {
void *refblock_data;
uint64_t first_offset_covered;
/* Reuse an existing refblock if possible, create a new one otherwise */
if (new_table[i]) {
ret = qcow2_cache_get(bs, s->refcount_block_cache, new_table[i],
&refblock_data);
if (ret < 0) {
goto fail;
}
} else {
ret = qcow2_cache_get_empty(bs, s->refcount_block_cache,
block_offset, &refblock_data);
if (ret < 0) {
goto fail;
}
memset(refblock_data, 0, s->cluster_size);
qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
refblock_data);
new_table[i] = block_offset;
block_offset += s->cluster_size;
}
/* First host offset covered by this refblock */
first_offset_covered = (uint64_t)i * s->refcount_block_size *
s->cluster_size;
if (first_offset_covered < end_offset) {
int j, end_index;
/* Set the refcount of all of the new refcount structures to 1 */
if (first_offset_covered < start_offset) {
assert(i == area_reftable_index);
j = (start_offset - first_offset_covered) / s->cluster_size;
assert(j < s->refcount_block_size);
} else {
j = 0;
}
end_index = MIN((end_offset - first_offset_covered) /
s->cluster_size,
s->refcount_block_size);
for (; j < end_index; j++) {
/* The caller guaranteed us this space would be empty */
assert(s->get_refcount(refblock_data, j) == 0);
s->set_refcount(refblock_data, j, 1);
}
qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
refblock_data);
}
qcow2_cache_put(bs, s->refcount_block_cache, &refblock_data);
}
assert(block_offset == table_offset);
/* Write refcount blocks to disk */
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
blocks_clusters * s->cluster_size);
g_free(new_blocks);
new_blocks = NULL;
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
goto fail_table;
goto fail;
}
/* Write refcount table to disk */
for(i = 0; i < table_size; i++) {
for (i = 0; i < total_refblock_count; i++) {
cpu_to_be64s(&new_table[i]);
}
@@ -565,10 +673,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
table_size * sizeof(uint64_t));
if (ret < 0) {
goto fail_table;
goto fail;
}
for(i = 0; i < table_size; i++) {
for (i = 0; i < total_refblock_count; i++) {
be64_to_cpus(&new_table[i]);
}
@@ -584,7 +692,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
offsetof(QCowHeader, refcount_table_offset),
&data, sizeof(data));
if (ret < 0) {
goto fail_table;
goto fail;
}
/* And switch it in memory */
@@ -601,23 +709,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
QCOW2_DISCARD_OTHER);
ret = load_refcount_block(bs, new_block, refcount_block);
if (ret < 0) {
return ret;
}
return end_offset;
/* If we were trying to do the initial refcount update for some cluster
* allocation, we might have used the same clusters to store newly
* allocated metadata. Make the caller search some new space. */
return -EAGAIN;
fail_table:
g_free(new_blocks);
fail:
g_free(new_table);
fail_block:
if (*refcount_block != NULL) {
qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
}
return ret;
}
@@ -1323,11 +1418,10 @@ static int realloc_refcount_array(BDRVQcow2State *s, void **array,
*
* Modifies the number of errors in res.
*/
static int inc_refcounts(BlockDriverState *bs,
BdrvCheckResult *res,
void **refcount_table,
int64_t *refcount_table_size,
int64_t offset, int64_t size)
int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
void **refcount_table,
int64_t *refcount_table_size,
int64_t offset, int64_t size)
{
BDRVQcow2State *s = bs->opaque;
uint64_t start, last, cluster_offset, k, refcount;
@@ -1420,8 +1514,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
nb_csectors = ((l2_entry >> s->csize_shift) &
s->csize_mask) + 1;
l2_entry &= s->cluster_offset_mask;
ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
l2_entry & ~511, nb_csectors * 512);
ret = qcow2_inc_refcounts_imrt(bs, res,
refcount_table, refcount_table_size,
l2_entry & ~511, nb_csectors * 512);
if (ret < 0) {
goto fail;
}
@@ -1454,8 +1549,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
}
/* Mark cluster as used */
ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
offset, s->cluster_size);
ret = qcow2_inc_refcounts_imrt(bs, res,
refcount_table, refcount_table_size,
offset, s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -1508,8 +1604,8 @@ static int check_refcounts_l1(BlockDriverState *bs,
l1_size2 = l1_size * sizeof(uint64_t);
/* Mark L1 table as used */
ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
l1_table_offset, l1_size2);
ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
l1_table_offset, l1_size2);
if (ret < 0) {
goto fail;
}
@@ -1538,8 +1634,9 @@ static int check_refcounts_l1(BlockDriverState *bs,
if (l2_offset) {
/* Mark L2 table as used */
l2_offset &= L1E_OFFSET_MASK;
ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
l2_offset, s->cluster_size);
ret = qcow2_inc_refcounts_imrt(bs, res,
refcount_table, refcount_table_size,
l2_offset, s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -1730,7 +1827,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
}
ret = bdrv_truncate(bs->file, offset + s->cluster_size,
&local_err);
PREALLOC_MODE_OFF, &local_err);
if (ret < 0) {
error_report_err(local_err);
goto resize_fail;
@@ -1757,14 +1854,15 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
}
res->corruptions_fixed++;
ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
offset, s->cluster_size);
ret = qcow2_inc_refcounts_imrt(bs, res,
refcount_table, nb_clusters,
offset, s->cluster_size);
if (ret < 0) {
return ret;
}
/* No need to check whether the refcount is now greater than 1:
* This area was just allocated and zeroed, so it can only be
* exactly 1 after inc_refcounts() */
* exactly 1 after qcow2_inc_refcounts_imrt() */
continue;
resize_fail:
@@ -1779,8 +1877,8 @@ resize_fail:
}
if (offset != 0) {
ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
offset, s->cluster_size);
ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
offset, s->cluster_size);
if (ret < 0) {
return ret;
}
@@ -1820,8 +1918,8 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
}
/* header */
ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
0, s->cluster_size);
ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
0, s->cluster_size);
if (ret < 0) {
return ret;
}
@@ -1842,16 +1940,32 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
return ret;
}
}
ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
s->snapshots_offset, s->snapshots_size);
ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
s->snapshots_offset, s->snapshots_size);
if (ret < 0) {
return ret;
}
/* refcount data */
ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
s->refcount_table_offset,
s->refcount_table_size * sizeof(uint64_t));
ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
s->refcount_table_offset,
s->refcount_table_size * sizeof(uint64_t));
if (ret < 0) {
return ret;
}
/* encryption */
if (s->crypto_header.length) {
ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
s->crypto_header.offset,
s->crypto_header.length);
if (ret < 0) {
return ret;
}
}
/* bitmaps */
ret = qcow2_check_bitmaps_refcounts(bs, res, refcount_table, nb_clusters);
if (ret < 0) {
return ret;
}

File diff suppressed because it is too large Load Diff

View File

@@ -25,7 +25,7 @@
#ifndef BLOCK_QCOW2_H
#define BLOCK_QCOW2_H
#include "crypto/cipher.h"
#include "crypto/block.h"
#include "qemu/coroutine.h"
//#define DEBUG_ALLOC
@@ -36,6 +36,7 @@
#define QCOW_CRYPT_NONE 0
#define QCOW_CRYPT_AES 1
#define QCOW_CRYPT_LUKS 2
#define QCOW_MAX_CRYPT_CLUSTERS 32
#define QCOW_MAX_SNAPSHOTS 65536
@@ -52,6 +53,10 @@
* space for snapshot names and IDs */
#define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
/* Bitmap header extension constraints */
#define QCOW2_MAX_BITMAPS 65535
#define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS)
/* indicate that the refcount of the referenced cluster is exactly one. */
#define QCOW_OFLAG_COPIED (1ULL << 63)
/* indicate that the cluster is compressed (they never have the copied flag) */
@@ -163,6 +168,11 @@ typedef struct QCowSnapshot {
struct Qcow2Cache;
typedef struct Qcow2Cache Qcow2Cache;
typedef struct Qcow2CryptoHeaderExtension {
uint64_t offset;
uint64_t length;
} QEMU_PACKED Qcow2CryptoHeaderExtension;
typedef struct Qcow2UnknownHeaderExtension {
uint32_t magic;
uint32_t len;
@@ -195,6 +205,14 @@ enum {
QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS,
};
/* Autoclear feature bits */
enum {
QCOW2_AUTOCLEAR_BITMAPS_BITNR = 0,
QCOW2_AUTOCLEAR_BITMAPS = 1 << QCOW2_AUTOCLEAR_BITMAPS_BITNR,
QCOW2_AUTOCLEAR_MASK = QCOW2_AUTOCLEAR_BITMAPS,
};
enum qcow2_discard_type {
QCOW2_DISCARD_NEVER = 0,
QCOW2_DISCARD_ALWAYS,
@@ -222,6 +240,13 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
typedef void Qcow2SetRefcountFunc(void *refcount_array,
uint64_t index, uint64_t value);
typedef struct Qcow2BitmapHeaderExt {
uint32_t nb_bitmaps;
uint32_t reserved32;
uint64_t bitmap_directory_size;
uint64_t bitmap_directory_offset;
} QEMU_PACKED Qcow2BitmapHeaderExt;
typedef struct BDRVQcow2State {
int cluster_bits;
int cluster_size;
@@ -257,13 +282,21 @@ typedef struct BDRVQcow2State {
CoMutex lock;
QCryptoCipher *cipher; /* current cipher, NULL if no key yet */
Qcow2CryptoHeaderExtension crypto_header; /* QCow2 header extension */
QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
QCryptoBlock *crypto; /* Disk encryption format driver */
bool crypt_physical_offset; /* Whether to use virtual or physical offset
for encryption initialization vector tweak */
uint32_t crypt_method_header;
uint64_t snapshots_offset;
int snapshots_size;
unsigned int nb_snapshots;
QCowSnapshot *snapshots;
uint32_t nb_bitmaps;
uint64_t bitmap_directory_size;
uint64_t bitmap_directory_offset;
int flags;
int qcow_version;
bool use_lazy_refcounts;
@@ -301,10 +334,10 @@ typedef struct Qcow2COWRegion {
* Offset of the COW region in bytes from the start of the first cluster
* touched by the request.
*/
uint64_t offset;
unsigned offset;
/** Number of bytes to copy */
int nb_bytes;
unsigned nb_bytes;
} Qcow2COWRegion;
/**
@@ -343,6 +376,13 @@ typedef struct QCowL2Meta
*/
Qcow2COWRegion cow_end;
/**
* The I/O vector with the data from the actual guest write request.
* If non-NULL, this is meant to be merged together with the data
* from @cow_start and @cow_end into one single write operation.
*/
QEMUIOVector *data_qiov;
/** Pointer to next L2Meta of the same write request */
struct QCowL2Meta *next;
@@ -485,6 +525,10 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, int nb_sectors);
int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
int refcount_order, bool generous_increase,
uint64_t *refblock_count);
int qcow2_mark_dirty(BlockDriverState *bs);
int qcow2_mark_corrupt(BlockDriverState *bs);
int qcow2_mark_consistent(BlockDriverState *bs);
@@ -505,6 +549,11 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
uint64_t addend, bool decrease,
enum qcow2_discard_type type);
int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset,
uint64_t additional_clusters, bool exact_size,
int new_refblock_index,
uint64_t new_refblock_offset);
int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
int64_t nb_clusters);
@@ -527,6 +576,10 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
int64_t size);
int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
int64_t size);
int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
void **refcount_table,
int64_t *refcount_table_size,
int64_t offset, int64_t size);
int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
BlockDriverAmendStatusCB *status_cb,
@@ -538,8 +591,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
uint8_t *out_buf, const uint8_t *in_buf,
int nb_sectors, bool enc, Error **errp);
uint8_t *buf, int nb_sectors, bool enc, Error **errp);
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
unsigned int *bytes, uint64_t *cluster_offset);
@@ -598,4 +650,20 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
void **table);
void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
/* qcow2-bitmap.c functions */
int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
void **refcount_table,
int64_t *refcount_table_size);
bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp);
int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp);
int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs,
const char *name,
uint32_t granularity,
Error **errp);
void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
const char *name,
Error **errp);
#endif

View File

@@ -61,37 +61,65 @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
return i - index;
}
typedef struct {
BDRVQEDState *s;
uint64_t pos;
size_t len;
QEDRequest *request;
/* User callback */
QEDFindClusterFunc *cb;
void *opaque;
} QEDFindClusterCB;
static void qed_find_cluster_cb(void *opaque, int ret)
/**
* Find the offset of a data cluster
*
* @s: QED state
* @request: L2 cache entry
* @pos: Byte position in device
* @len: Number of bytes (may be shortened on return)
* @img_offset: Contains offset in the image file on success
*
* This function translates a position in the block device to an offset in the
* image file. The translated offset or unallocated range in the image file is
* reported back in *img_offset and *len.
*
* If the L2 table exists, request->l2_table points to the L2 table cache entry
* and the caller must free the reference when they are finished. The cache
* entry is exposed in this way to avoid callers having to read the L2 table
* again later during request processing. If request->l2_table is non-NULL it
* will be unreferenced before taking on the new cache entry.
*
* On success QED_CLUSTER_FOUND is returned and img_offset/len are a contiguous
* range in the image file.
*
* On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1
* table offset, respectively. len is number of contiguous unallocated bytes.
*
* Called with table_lock held.
*/
int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
uint64_t pos, size_t *len,
uint64_t *img_offset)
{
QEDFindClusterCB *find_cluster_cb = opaque;
BDRVQEDState *s = find_cluster_cb->s;
QEDRequest *request = find_cluster_cb->request;
uint64_t l2_offset;
uint64_t offset = 0;
size_t len = 0;
unsigned int index;
unsigned int n;
int ret;
qed_acquire(s);
/* Limit length to L2 boundary. Requests are broken up at the L2 boundary
* so that a request acts on one L2 table at a time.
*/
*len = MIN(*len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
if (qed_offset_is_unalloc_cluster(l2_offset)) {
*img_offset = 0;
return QED_CLUSTER_L1;
}
if (!qed_check_table_offset(s, l2_offset)) {
*img_offset = *len = 0;
return -EINVAL;
}
ret = qed_read_l2_table(s, request, l2_offset);
if (ret) {
goto out;
}
index = qed_l2_index(s, find_cluster_cb->pos);
n = qed_bytes_to_clusters(s,
qed_offset_into_cluster(s, find_cluster_cb->pos) +
find_cluster_cb->len);
index = qed_l2_index(s, pos);
n = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, pos) + *len);
n = qed_count_contiguous_clusters(s, request->l2_table->table,
index, n, &offset);
@@ -105,64 +133,10 @@ static void qed_find_cluster_cb(void *opaque, int ret)
ret = -EINVAL;
}
len = MIN(find_cluster_cb->len, n * s->header.cluster_size -
qed_offset_into_cluster(s, find_cluster_cb->pos));
*len = MIN(*len,
n * s->header.cluster_size - qed_offset_into_cluster(s, pos));
out:
find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
qed_release(s);
g_free(find_cluster_cb);
}
/**
* Find the offset of a data cluster
*
* @s: QED state
* @request: L2 cache entry
* @pos: Byte position in device
* @len: Number of bytes
* @cb: Completion function
* @opaque: User data for completion function
*
* This function translates a position in the block device to an offset in the
* image file. It invokes the cb completion callback to report back the
* translated offset or unallocated range in the image file.
*
* If the L2 table exists, request->l2_table points to the L2 table cache entry
* and the caller must free the reference when they are finished. The cache
* entry is exposed in this way to avoid callers having to read the L2 table
* again later during request processing. If request->l2_table is non-NULL it
* will be unreferenced before taking on the new cache entry.
*/
void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
size_t len, QEDFindClusterFunc *cb, void *opaque)
{
QEDFindClusterCB *find_cluster_cb;
uint64_t l2_offset;
/* Limit length to L2 boundary. Requests are broken up at the L2 boundary
* so that a request acts on one L2 table at a time.
*/
len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
if (qed_offset_is_unalloc_cluster(l2_offset)) {
cb(opaque, QED_CLUSTER_L1, 0, len);
return;
}
if (!qed_check_table_offset(s, l2_offset)) {
cb(opaque, -EINVAL, 0, 0);
return;
}
find_cluster_cb = g_malloc(sizeof(*find_cluster_cb));
find_cluster_cb->s = s;
find_cluster_cb->pos = pos;
find_cluster_cb->len = len;
find_cluster_cb->cb = cb;
find_cluster_cb->opaque = opaque;
find_cluster_cb->request = request;
qed_read_l2_table(s, request, l2_offset,
qed_find_cluster_cb, find_cluster_cb);
*img_offset = offset;
return ret;
}

View File

@@ -1,33 +0,0 @@
/*
* QEMU Enhanced Disk Format
*
* Copyright IBM, Corp. 2010
*
* Authors:
* Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
*
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
* See the COPYING.LIB file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include "qed.h"
void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
{
GenericCB *gencb = g_malloc(len);
gencb->cb = cb;
gencb->opaque = opaque;
return gencb;
}
void gencb_complete(void *opaque, int ret)
{
GenericCB *gencb = opaque;
BlockCompletionFunc *cb = gencb->cb;
void *user_opaque = gencb->opaque;
g_free(gencb);
cb(user_opaque, ret);
}

View File

@@ -101,6 +101,8 @@ CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
/**
* Decrease an entry's reference count and free if necessary when the reference
* count drops to zero.
*
* Called with table_lock held.
*/
void qed_unref_l2_cache_entry(CachedL2Table *entry)
{
@@ -122,6 +124,8 @@ void qed_unref_l2_cache_entry(CachedL2Table *entry)
*
* For a cached entry, this function increases the reference count and returns
* the entry.
*
* Called with table_lock held.
*/
CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
{
@@ -150,6 +154,8 @@ CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
* N.B. This function steals a reference to the l2_table from the caller so the
* caller must obtain a new reference by issuing a call to
* qed_find_l2_cache_entry().
*
* Called with table_lock held.
*/
void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
{

View File

@@ -18,99 +18,43 @@
#include "qed.h"
#include "qemu/bswap.h"
typedef struct {
GenericCB gencb;
BDRVQEDState *s;
QEDTable *table;
struct iovec iov;
QEMUIOVector qiov;
} QEDReadTableCB;
static void qed_read_table_cb(void *opaque, int ret)
/* Called either from qed_check or with table_lock held. */
static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
{
QEDReadTableCB *read_table_cb = opaque;
QEDTable *table = read_table_cb->table;
BDRVQEDState *s = read_table_cb->s;
int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
int i;
QEMUIOVector qiov;
int noffsets;
int i, ret;
/* Handle I/O error */
if (ret) {
struct iovec iov = {
.iov_base = table->offsets,
.iov_len = s->header.cluster_size * s->header.table_size,
};
qemu_iovec_init_external(&qiov, &iov, 1);
trace_qed_read_table(s, offset, table);
if (qemu_in_coroutine()) {
qemu_co_mutex_unlock(&s->table_lock);
}
ret = bdrv_preadv(s->bs->file, offset, &qiov);
if (qemu_in_coroutine()) {
qemu_co_mutex_lock(&s->table_lock);
}
if (ret < 0) {
goto out;
}
/* Byteswap offsets */
qed_acquire(s);
noffsets = qiov.size / sizeof(uint64_t);
for (i = 0; i < noffsets; i++) {
table->offsets[i] = le64_to_cpu(table->offsets[i]);
}
qed_release(s);
ret = 0;
out:
/* Completion */
trace_qed_read_table_cb(s, read_table_cb->table, ret);
gencb_complete(&read_table_cb->gencb, ret);
}
static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
BlockCompletionFunc *cb, void *opaque)
{
QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
cb, opaque);
QEMUIOVector *qiov = &read_table_cb->qiov;
trace_qed_read_table(s, offset, table);
read_table_cb->s = s;
read_table_cb->table = table;
read_table_cb->iov.iov_base = table->offsets,
read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
qiov->size / BDRV_SECTOR_SIZE,
qed_read_table_cb, read_table_cb);
}
typedef struct {
GenericCB gencb;
BDRVQEDState *s;
QEDTable *orig_table;
QEDTable *table;
bool flush; /* flush after write? */
struct iovec iov;
QEMUIOVector qiov;
} QEDWriteTableCB;
static void qed_write_table_cb(void *opaque, int ret)
{
QEDWriteTableCB *write_table_cb = opaque;
BDRVQEDState *s = write_table_cb->s;
trace_qed_write_table_cb(s,
write_table_cb->orig_table,
write_table_cb->flush,
ret);
if (ret) {
goto out;
}
if (write_table_cb->flush) {
/* We still need to flush first */
write_table_cb->flush = false;
qed_acquire(s);
bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
write_table_cb);
qed_release(s);
return;
}
out:
qemu_vfree(write_table_cb->table);
gencb_complete(&write_table_cb->gencb, ret);
trace_qed_read_table_cb(s, table, ret);
return ret;
}
/**
@@ -122,17 +66,19 @@ out:
* @index: Index of first element
* @n: Number of elements
* @flush: Whether or not to sync to disk
* @cb: Completion function
* @opaque: Argument for completion function
*
* Called either from qed_check or with table_lock held.
*/
static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
unsigned int index, unsigned int n, bool flush,
BlockCompletionFunc *cb, void *opaque)
static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
unsigned int index, unsigned int n, bool flush)
{
QEDWriteTableCB *write_table_cb;
unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
unsigned int start, end, i;
QEDTable *new_table;
struct iovec iov;
QEMUIOVector qiov;
size_t len_bytes;
int ret;
trace_qed_write_table(s, offset, table, index, n);
@@ -142,157 +88,120 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
len_bytes = (end - start) * sizeof(uint64_t);
write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
write_table_cb->s = s;
write_table_cb->orig_table = table;
write_table_cb->flush = flush;
write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
write_table_cb->iov.iov_base = write_table_cb->table->offsets;
write_table_cb->iov.iov_len = len_bytes;
qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);
new_table = qemu_blockalign(s->bs, len_bytes);
iov = (struct iovec) {
.iov_base = new_table->offsets,
.iov_len = len_bytes,
};
qemu_iovec_init_external(&qiov, &iov, 1);
/* Byteswap table */
for (i = start; i < end; i++) {
uint64_t le_offset = cpu_to_le64(table->offsets[i]);
write_table_cb->table->offsets[i - start] = le_offset;
new_table->offsets[i - start] = le_offset;
}
/* Adjust for offset into table */
offset += start * sizeof(uint64_t);
bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
&write_table_cb->qiov,
write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
qed_write_table_cb, write_table_cb);
}
if (qemu_in_coroutine()) {
qemu_co_mutex_unlock(&s->table_lock);
}
ret = bdrv_pwritev(s->bs->file, offset, &qiov);
if (qemu_in_coroutine()) {
qemu_co_mutex_lock(&s->table_lock);
}
trace_qed_write_table_cb(s, table, flush, ret);
if (ret < 0) {
goto out;
}
/**
* Propagate return value from async callback
*/
static void qed_sync_cb(void *opaque, int ret)
{
*(int *)opaque = ret;
if (flush) {
ret = bdrv_flush(s->bs);
if (ret < 0) {
goto out;
}
}
ret = 0;
out:
qemu_vfree(new_table);
return ret;
}
int qed_read_l1_table_sync(BDRVQEDState *s)
{
int ret = -EINPROGRESS;
qed_read_table(s, s->header.l1_table_offset,
s->l1_table, qed_sync_cb, &ret);
BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
return ret;
return qed_read_table(s, s->header.l1_table_offset, s->l1_table);
}
void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
BlockCompletionFunc *cb, void *opaque)
/* Called either from qed_check or with table_lock held. */
int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n)
{
BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
qed_write_table(s, s->header.l1_table_offset,
s->l1_table, index, n, false, cb, opaque);
return qed_write_table(s, s->header.l1_table_offset,
s->l1_table, index, n, false);
}
int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
unsigned int n)
{
int ret = -EINPROGRESS;
qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
return ret;
return qed_write_l1_table(s, index, n);
}
typedef struct {
GenericCB gencb;
BDRVQEDState *s;
uint64_t l2_offset;
QEDRequest *request;
} QEDReadL2TableCB;
static void qed_read_l2_table_cb(void *opaque, int ret)
/* Called either from qed_check or with table_lock held. */
int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
{
QEDReadL2TableCB *read_l2_table_cb = opaque;
QEDRequest *request = read_l2_table_cb->request;
BDRVQEDState *s = read_l2_table_cb->s;
CachedL2Table *l2_table = request->l2_table;
uint64_t l2_offset = read_l2_table_cb->l2_offset;
qed_acquire(s);
if (ret) {
/* can't trust loaded L2 table anymore */
qed_unref_l2_cache_entry(l2_table);
request->l2_table = NULL;
} else {
l2_table->offset = l2_offset;
qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
/* This is guaranteed to succeed because we just committed the entry
* to the cache.
*/
request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
assert(request->l2_table != NULL);
}
qed_release(s);
gencb_complete(&read_l2_table_cb->gencb, ret);
}
void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
BlockCompletionFunc *cb, void *opaque)
{
QEDReadL2TableCB *read_l2_table_cb;
int ret;
qed_unref_l2_cache_entry(request->l2_table);
/* Check for cached L2 entry */
request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
if (request->l2_table) {
cb(opaque, 0);
return;
return 0;
}
request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
request->l2_table->table = qed_alloc_table(s);
read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque);
read_l2_table_cb->s = s;
read_l2_table_cb->l2_offset = offset;
read_l2_table_cb->request = request;
BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
qed_read_table(s, offset, request->l2_table->table,
qed_read_l2_table_cb, read_l2_table_cb);
}
ret = qed_read_table(s, offset, request->l2_table->table);
int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
{
int ret = -EINPROGRESS;
if (ret) {
/* can't trust loaded L2 table anymore */
qed_unref_l2_cache_entry(request->l2_table);
request->l2_table = NULL;
} else {
request->l2_table->offset = offset;
qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
qed_commit_l2_cache_entry(&s->l2_cache, request->l2_table);
/* This is guaranteed to succeed because we just committed the entry
* to the cache.
*/
request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
assert(request->l2_table != NULL);
}
return ret;
}
void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
unsigned int index, unsigned int n, bool flush,
BlockCompletionFunc *cb, void *opaque)
int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
{
return qed_read_l2_table(s, request, offset);
}
/* Called either from qed_check or with table_lock held. */
int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
unsigned int index, unsigned int n, bool flush)
{
BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
qed_write_table(s, request->l2_table->offset,
request->l2_table->table, index, n, flush, cb, opaque);
return qed_write_table(s, request->l2_table->offset,
request->l2_table->table, index, n, flush);
}
int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
unsigned int index, unsigned int n, bool flush)
{
int ret = -EINPROGRESS;
qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
return ret;
return qed_write_l2_table(s, request, index, n, flush);
}

File diff suppressed because it is too large Load Diff

View File

@@ -129,8 +129,7 @@ enum {
};
typedef struct QEDAIOCB {
BlockAIOCB common;
int bh_ret; /* final return status for completion bh */
BlockDriverState *bs;
QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */
int flags; /* QED_AIOCB_* bits ORed together */
uint64_t end_pos; /* request end on block device, in bytes */
@@ -152,18 +151,25 @@ typedef struct QEDAIOCB {
typedef struct {
BlockDriverState *bs; /* device */
uint64_t file_size; /* length of image file, in bytes */
/* Written only by an allocating write or the timer handler (the latter
* while allocating reqs are plugged).
*/
QEDHeader header; /* always cpu-endian */
/* Protected by table_lock. */
CoMutex table_lock;
QEDTable *l1_table;
L2TableCache l2_cache; /* l2 table cache */
uint32_t table_nelems;
uint32_t l1_shift;
uint32_t l2_shift;
uint32_t l2_mask;
uint64_t file_size; /* length of image file, in bytes */
/* Allocating write request queue */
QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
QEDAIOCB *allocating_acb;
CoQueue allocating_write_reqs;
bool allocating_write_reqs_plugged;
/* Periodic flush and clear need check flag */
@@ -177,41 +183,6 @@ enum {
QED_CLUSTER_L1, /* cluster missing in L1 */
};
/**
* qed_find_cluster() completion callback
*
* @opaque: User data for completion callback
* @ret: QED_CLUSTER_FOUND Success
* QED_CLUSTER_L2 Data cluster unallocated in L2
* QED_CLUSTER_L1 L2 unallocated in L1
* -errno POSIX error occurred
* @offset: Data cluster offset
* @len: Contiguous bytes starting from cluster offset
*
* This function is invoked when qed_find_cluster() completes.
*
* On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
* in the image file.
*
* On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
* table offset, respectively. len is number of contiguous unallocated bytes.
*/
typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
void qed_acquire(BDRVQEDState *s);
void qed_release(BDRVQEDState *s);
/**
* Generic callback for chaining async callbacks
*/
typedef struct {
BlockCompletionFunc *cb;
void *opaque;
} GenericCB;
void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque);
void gencb_complete(void *opaque, int ret);
/**
* Header functions
*/
@@ -231,25 +202,23 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
* Table I/O functions
*/
int qed_read_l1_table_sync(BDRVQEDState *s);
void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
BlockCompletionFunc *cb, void *opaque);
int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n);
int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
unsigned int n);
int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
uint64_t offset);
void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
BlockCompletionFunc *cb, void *opaque);
void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
unsigned int index, unsigned int n, bool flush,
BlockCompletionFunc *cb, void *opaque);
int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset);
int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
unsigned int index, unsigned int n, bool flush);
int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
unsigned int index, unsigned int n, bool flush);
/**
* Cluster functions
*/
void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
size_t len, QEDFindClusterFunc *cb, void *opaque);
int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
uint64_t pos, size_t *len,
uint64_t *img_offset);
/**
* Consistency check

View File

@@ -19,7 +19,6 @@
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qjson.h"
#include "qapi/qmp/qlist.h"
#include "qapi/qmp/qstring.h"

View File

@@ -259,12 +259,12 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
*pnum = nb_sectors;
*file = bs->file->bs;
sector_num += s->offset / BDRV_SECTOR_SIZE;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
(sector_num << BDRV_SECTOR_BITS);
}
static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count,
int64_t offset, int bytes,
BdrvRequestFlags flags)
{
BDRVRawState *s = bs->opaque;
@@ -272,18 +272,18 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
return -EINVAL;
}
offset += s->offset;
return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
}
static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
int64_t offset, int bytes)
{
BDRVRawState *s = bs->opaque;
if (offset > UINT64_MAX - s->offset) {
return -EINVAL;
}
offset += s->offset;
return bdrv_co_pdiscard(bs->file->bs, offset, count);
return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
}
static int64_t raw_getlength(BlockDriverState *bs)
@@ -312,6 +312,31 @@ static int64_t raw_getlength(BlockDriverState *bs)
return s->size;
}
static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs,
Error **errp)
{
BlockMeasureInfo *info;
int64_t required;
if (in_bs) {
required = bdrv_getlength(in_bs);
if (required < 0) {
error_setg_errno(errp, -required, "Unable to get image size");
return NULL;
}
} else {
required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
}
info = g_new(BlockMeasureInfo, 1);
info->required = required;
/* Unallocated sectors count towards the file size in raw images */
info->fully_allocated = info->required;
return info;
}
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
return bdrv_get_info(bs->file->bs, bdi);
@@ -327,7 +352,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
}
}
static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
static int raw_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
BDRVRawState *s = bs->opaque;
@@ -343,7 +369,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
s->size = offset;
offset += s->offset;
return bdrv_truncate(bs->file, offset, errp);
return bdrv_truncate(bs->file, offset, prealloc, errp);
}
static int raw_media_changed(BlockDriverState *bs)
@@ -479,6 +505,7 @@ BlockDriver bdrv_raw = {
.bdrv_truncate = &raw_truncate,
.bdrv_getlength = &raw_getlength,
.has_variable_length = true,
.bdrv_measure = &raw_measure,
.bdrv_get_info = &raw_get_info,
.bdrv_refresh_limits = &raw_refresh_limits,
.bdrv_probe_blocksizes = &raw_probe_blocksizes,

View File

@@ -340,6 +340,10 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "Legacy rados key/value option parameters",
},
{
.name = "filename",
.type = QEMU_OPT_STRING,
},
{ /* end of list */ }
},
};
@@ -541,12 +545,27 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
{
BDRVRBDState *s = bs->opaque;
const char *pool, *snap, *conf, *user, *image_name, *keypairs;
const char *secretid;
const char *secretid, *filename;
QemuOpts *opts;
Error *local_err = NULL;
char *mon_host = NULL;
int r;
/* If we are given a filename, parse the filename, with precedence given to
* filename encoded options */
filename = qdict_get_try_str(options, "filename");
if (filename) {
warn_report("'filename' option specified. "
"This is an unsupported option, and may be deprecated "
"in the future");
qemu_rbd_parse_filename(filename, options, &local_err);
if (local_err) {
r = -EINVAL;
error_propagate(errp, local_err);
goto exit;
}
}
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
@@ -665,6 +684,7 @@ failed_shutdown:
failed_opts:
qemu_opts_del(opts);
g_free(mon_host);
exit:
return r;
}
@@ -916,11 +936,18 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
return info.size;
}
static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
BDRVRBDState *s = bs->opaque;
int r;
if (prealloc != PREALLOC_MODE_OFF) {
error_setg(errp, "Unsupported preallocation mode '%s'",
PreallocMode_lookup[prealloc]);
return -ENOTSUP;
}
r = rbd_resize(s->image, offset);
if (r < 0) {
error_setg_errno(errp, -r, "Failed to resize file");
@@ -1045,11 +1072,11 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
#ifdef LIBRBD_SUPPORTS_DISCARD
static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
int64_t offset,
int count,
int bytes,
BlockCompletionFunc *cb,
void *opaque)
{
return rbd_start_aio(bs, offset, NULL, count, cb, opaque,
return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque,
RBD_AIO_DISCARD);
}
#endif

View File

@@ -234,10 +234,14 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs,
}
if (job) {
backup_wait_for_overlapping_requests(child->bs->job, sector_num,
remaining_sectors);
backup_cow_request_begin(&req, child->bs->job, sector_num,
remaining_sectors);
uint64_t remaining_bytes = remaining_sectors * BDRV_SECTOR_SIZE;
backup_wait_for_overlapping_requests(child->bs->job,
sector_num * BDRV_SECTOR_SIZE,
remaining_bytes);
backup_cow_request_begin(&req, child->bs->job,
sector_num * BDRV_SECTOR_SIZE,
remaining_bytes);
ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors,
qiov);
backup_cow_request_end(&req);
@@ -260,7 +264,8 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
BdrvChild *top = bs->file;
BdrvChild *base = s->secondary_disk;
BdrvChild *target;
int ret, n;
int ret;
int64_t n;
ret = replication_get_io_status(s);
if (ret < 0) {
@@ -279,14 +284,20 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
*/
qemu_iovec_init(&hd_qiov, qiov->niov);
while (remaining_sectors > 0) {
ret = bdrv_is_allocated_above(top->bs, base->bs, sector_num,
remaining_sectors, &n);
int64_t count;
ret = bdrv_is_allocated_above(top->bs, base->bs,
sector_num * BDRV_SECTOR_SIZE,
remaining_sectors * BDRV_SECTOR_SIZE,
&count);
if (ret < 0) {
goto out1;
}
assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
n = count >> BDRV_SECTOR_BITS;
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, n * BDRV_SECTOR_SIZE);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count);
target = ret ? top : base;
ret = bdrv_co_writev(target, sector_num, n, &hd_qiov);
@@ -296,7 +307,7 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
remaining_sectors -= n;
sector_num += n;
bytes_done += n * BDRV_SECTOR_SIZE;
bytes_done += count;
}
out1:

View File

@@ -16,7 +16,6 @@
#include "qapi-visit.h"
#include "qapi/error.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qint.h"
#include "qapi/qobject-input-visitor.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
@@ -391,6 +390,7 @@ struct BDRVSheepdogState {
QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
CoMutex queue_lock;
CoQueue overlapping_queue;
QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
};
@@ -489,7 +489,7 @@ static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
retry:
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
if (AIOCBOverlapping(acb, cb)) {
qemu_co_queue_wait(&s->overlapping_queue, NULL);
qemu_co_queue_wait(&s->overlapping_queue, &s->queue_lock);
goto retry;
}
}
@@ -526,8 +526,10 @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
return;
}
qemu_co_mutex_lock(&s->queue_lock);
wait_for_overlapping_aiocb(s, acb);
QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
qemu_co_mutex_unlock(&s->queue_lock);
}
static SocketAddress *sd_socket_address(const char *path,
@@ -698,7 +700,8 @@ out:
srco->co = NULL;
srco->ret = ret;
srco->finished = true;
/* Set srco->finished before reading bs->wakeup. */
atomic_mb_set(&srco->finished, true);
if (srco->bs) {
bdrv_wakeup(srco->bs);
}
@@ -785,6 +788,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
* have to move all the inflight requests to the failed queue before
* resend_aioreq() is called.
*/
qemu_co_mutex_lock(&s->queue_lock);
QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) {
QLIST_REMOVE(aio_req, aio_siblings);
QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings);
@@ -794,8 +798,11 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
while (!QLIST_EMPTY(&s->failed_aio_head)) {
aio_req = QLIST_FIRST(&s->failed_aio_head);
QLIST_REMOVE(aio_req, aio_siblings);
qemu_co_mutex_unlock(&s->queue_lock);
resend_aioreq(s, aio_req);
qemu_co_mutex_lock(&s->queue_lock);
}
qemu_co_mutex_unlock(&s->queue_lock);
}
/*
@@ -887,7 +894,10 @@ static void coroutine_fn aio_read_response(void *opaque)
*/
s->co_recv = NULL;
qemu_co_mutex_lock(&s->queue_lock);
QLIST_REMOVE(aio_req, aio_siblings);
qemu_co_mutex_unlock(&s->queue_lock);
switch (rsp.result) {
case SD_RES_SUCCESS:
break;
@@ -1046,11 +1056,11 @@ static void sd_parse_uri(SheepdogConfig *cfg, const char *filename,
}
/* transport */
if (!strcmp(uri->scheme, "sheepdog")) {
if (!g_strcmp0(uri->scheme, "sheepdog")) {
is_unix = false;
} else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
} else if (!g_strcmp0(uri->scheme, "sheepdog+tcp")) {
is_unix = false;
} else if (!strcmp(uri->scheme, "sheepdog+unix")) {
} else if (!g_strcmp0(uri->scheme, "sheepdog+unix")) {
is_unix = true;
} else {
error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp',"
@@ -1307,7 +1317,9 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
uint64_t old_oid = aio_req->base_oid;
bool create = aio_req->create;
qemu_co_mutex_lock(&s->queue_lock);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
qemu_co_mutex_unlock(&s->queue_lock);
if (!nr_copies) {
error_report("bug");
@@ -1678,6 +1690,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
pstrcpy(s->name, sizeof(s->name), vdi);
qemu_co_mutex_init(&s->lock);
qemu_co_mutex_init(&s->queue_lock);
qemu_co_queue_init(&s->overlapping_queue);
qemu_opts_del(opts);
g_free(buf);
@@ -2153,13 +2166,20 @@ static int64_t sd_getlength(BlockDriverState *bs)
return s->inode.vdi_size;
}
static int sd_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
static int sd_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
BDRVSheepdogState *s = bs->opaque;
int ret, fd;
unsigned int datalen;
uint64_t max_vdi_size;
if (prealloc != PREALLOC_MODE_OFF) {
error_setg(errp, "Unsupported preallocation mode '%s'",
PreallocMode_lookup[prealloc]);
return -ENOTSUP;
}
max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
if (offset < s->inode.vdi_size) {
error_setg(errp, "shrinking is not supported");
@@ -2431,12 +2451,16 @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
static void sd_aio_complete(SheepdogAIOCB *acb)
{
BDRVSheepdogState *s;
if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
return;
}
s = acb->s;
qemu_co_mutex_lock(&s->queue_lock);
QLIST_REMOVE(acb, aiocb_siblings);
qemu_co_queue_restart_all(&acb->s->overlapping_queue);
qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_co_mutex_unlock(&s->queue_lock);
}
static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
@@ -2448,7 +2472,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
BDRVSheepdogState *s = bs->opaque;
if (offset > s->inode.vdi_size) {
ret = sd_truncate(bs, offset, NULL);
ret = sd_truncate(bs, offset, PREALLOC_MODE_OFF, NULL);
if (ret < 0) {
return ret;
}
@@ -2935,7 +2959,7 @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
int count)
int bytes)
{
SheepdogAIOCB acb;
BDRVSheepdogState *s = bs->opaque;
@@ -2953,11 +2977,11 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
iov.iov_len = sizeof(zero);
discard_iov.iov = &iov;
discard_iov.niov = 1;
if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) {
if (!QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)) {
return -ENOTSUP;
}
sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
bytes >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
sd_co_rw_vector(&acb);
sd_aio_complete(&acb);

View File

@@ -34,7 +34,6 @@
#include "qemu/sockets.h"
#include "qemu/uri.h"
#include "qapi-visit.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
#include "qapi/qobject-input-visitor.h"
#include "qapi/qobject-output-visitor.h"
@@ -205,7 +204,7 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
return -EINVAL;
}
if (strcmp(uri->scheme, "ssh") != 0) {
if (g_strcmp0(uri->scheme, "ssh") != 0) {
error_setg(errp, "URI scheme must be 'ssh'");
goto err;
}
@@ -889,13 +888,22 @@ static int ssh_has_zero_init(BlockDriverState *bs)
return has_zero_init;
}
typedef struct BDRVSSHRestart {
BlockDriverState *bs;
Coroutine *co;
} BDRVSSHRestart;
static void restart_coroutine(void *opaque)
{
Coroutine *co = opaque;
BDRVSSHRestart *restart = opaque;
BlockDriverState *bs = restart->bs;
BDRVSSHState *s = bs->opaque;
AioContext *ctx = bdrv_get_aio_context(bs);
DPRINTF("co=%p", co);
DPRINTF("co=%p", restart->co);
aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL);
aio_co_wake(co);
aio_co_wake(restart->co);
}
/* A non-blocking call returned EAGAIN, so yield, ensuring the
@@ -906,7 +914,10 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
{
int r;
IOHandler *rd_handler = NULL, *wr_handler = NULL;
Coroutine *co = qemu_coroutine_self();
BDRVSSHRestart restart = {
.bs = bs,
.co = qemu_coroutine_self()
};
r = libssh2_session_block_directions(s->session);
@@ -921,11 +932,9 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
rd_handler, wr_handler);
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
false, rd_handler, wr_handler, NULL, co);
false, rd_handler, wr_handler, NULL, &restart);
qemu_coroutine_yield();
DPRINTF("s->sock=%d - back", s->sock);
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
NULL, NULL, NULL, NULL);
}
/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
@@ -1115,8 +1124,8 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
{
if (!s->unsafe_flush_warning) {
error_report("warning: ssh server %s does not support fsync",
s->inet->host);
warn_report("ssh server %s does not support fsync",
s->inet->host);
if (what) {
error_report("to support fsync, you need %s", what);
}

View File

@@ -41,25 +41,24 @@ typedef struct StreamBlockJob {
} StreamBlockJob;
static int coroutine_fn stream_populate(BlockBackend *blk,
int64_t sector_num, int nb_sectors,
int64_t offset, uint64_t bytes,
void *buf)
{
struct iovec iov = {
.iov_base = buf,
.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
.iov_len = bytes,
};
QEMUIOVector qiov;
assert(bytes < SIZE_MAX);
qemu_iovec_init_external(&qiov, &iov, 1);
/* Copy-on-read the unallocated clusters */
return blk_co_preadv(blk, sector_num * BDRV_SECTOR_SIZE, qiov.size, &qiov,
BDRV_REQ_COPY_ON_READ);
return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
}
typedef struct {
int ret;
bool reached_end;
} StreamCompleteData;
static void stream_complete(BlockJob *job, void *opaque)
@@ -70,7 +69,7 @@ static void stream_complete(BlockJob *job, void *opaque)
BlockDriverState *base = s->base;
Error *local_err = NULL;
if (!block_job_is_cancelled(&s->common) && data->reached_end &&
if (!block_job_is_cancelled(&s->common) && bs->backing &&
data->ret == 0) {
const char *base_id = NULL, *base_fmt = NULL;
if (base) {
@@ -108,12 +107,11 @@ static void coroutine_fn stream_run(void *opaque)
BlockBackend *blk = s->common.blk;
BlockDriverState *bs = blk_bs(blk);
BlockDriverState *base = s->base;
int64_t sector_num = 0;
int64_t end = -1;
int64_t offset = 0;
uint64_t delay_ns = 0;
int error = 0;
int ret = 0;
int n = 0;
int64_t n = 0; /* bytes */
void *buf;
if (!bs->backing) {
@@ -126,7 +124,6 @@ static void coroutine_fn stream_run(void *opaque)
goto out;
}
end = s->common.len >> BDRV_SECTOR_BITS;
buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE);
/* Turn on copy-on-read for the whole block device so that guest read
@@ -138,7 +135,7 @@ static void coroutine_fn stream_run(void *opaque)
bdrv_enable_copy_on_read(bs);
}
for (sector_num = 0; sector_num < end; sector_num += n) {
for ( ; offset < s->common.len; offset += n) {
bool copy;
/* Note that even when no rate limit is applied we need to yield
@@ -151,26 +148,25 @@ static void coroutine_fn stream_run(void *opaque)
copy = false;
ret = bdrv_is_allocated(bs, sector_num,
STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &n);
if (ret == 1) {
/* Allocated in the top, no need to copy. */
} else if (ret >= 0) {
/* Copy if allocated in the intermediate images. Limit to the
* known-unallocated area [sector_num, sector_num+n). */
* known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */
ret = bdrv_is_allocated_above(backing_bs(bs), base,
sector_num, n, &n);
offset, n, &n);
/* Finish early if end of backing file has been reached */
if (ret == 0 && n == 0) {
n = end - sector_num;
n = s->common.len - offset;
}
copy = (ret == 1);
}
trace_stream_one_iteration(s, sector_num, n, ret);
trace_stream_one_iteration(s, offset, n, ret);
if (copy) {
ret = stream_populate(blk, sector_num, n, buf);
ret = stream_populate(blk, offset, n, buf);
}
if (ret < 0) {
BlockErrorAction action =
@@ -189,7 +185,7 @@ static void coroutine_fn stream_run(void *opaque)
ret = 0;
/* Publish progress */
s->common.offset += n * BDRV_SECTOR_SIZE;
s->common.offset += n;
if (copy && s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, n);
}
@@ -208,7 +204,6 @@ out:
/* Modify backing chain and close BDSes in main loop */
data = g_malloc(sizeof(*data));
data->ret = ret;
data->reached_end = sector_num == end;
block_job_defer_to_main_loop(&s->common, stream_complete, data);
}
@@ -220,7 +215,7 @@ static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
}
static const BlockJobDriver stream_job_driver = {
@@ -280,6 +275,6 @@ void stream_start(const char *job_id, BlockDriverState *bs,
fail:
if (orig_bs_flags != bdrv_get_flags(bs)) {
bdrv_reopen(bs, s->bs_flags, NULL);
bdrv_reopen(bs, orig_bs_flags, NULL);
}
}

View File

@@ -49,7 +49,7 @@
* Again, all this is handled internally and is mostly transparent to
* the outside. The 'throttle_timers' field however has an additional
* constraint because it may be temporarily invalid (see for example
* bdrv_set_aio_context()). Therefore in this file a thread will
* blk_set_aio_context()). Therefore in this file a thread will
* access some other BlockBackend's timers only after verifying that
* that BlockBackend has throttled requests in the queue.
*/
@@ -61,6 +61,7 @@ typedef struct ThrottleGroup {
QLIST_HEAD(, BlockBackendPublic) head;
BlockBackend *tokens[2];
bool any_timer_armed[2];
QEMUClockType clock_type;
/* These two are protected by the global throttle_groups_lock */
unsigned refcount;
@@ -98,6 +99,12 @@ ThrottleState *throttle_group_incref(const char *name)
if (!tg) {
tg = g_new0(ThrottleGroup, 1);
tg->name = g_strdup(name);
tg->clock_type = QEMU_CLOCK_REALTIME;
if (qtest_enabled()) {
/* For testing block IO throttling only */
tg->clock_type = QEMU_CLOCK_VIRTUAL;
}
qemu_mutex_init(&tg->lock);
throttle_init(&tg->ts);
QLIST_INIT(&tg->head);
@@ -240,7 +247,7 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
bool must_wait;
if (blkp->io_limits_disabled) {
if (atomic_read(&blkp->io_limits_disabled)) {
return false;
}
@@ -260,6 +267,25 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
return must_wait;
}
/* Start the next pending I/O request for a BlockBackend. Return whether
* any request was actually pending.
*
* @blk: the current BlockBackend
* @is_write: the type of operation (read/write)
*/
static bool coroutine_fn throttle_group_co_restart_queue(BlockBackend *blk,
bool is_write)
{
BlockBackendPublic *blkp = blk_get_public(blk);
bool ret;
qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
ret = qemu_co_queue_next(&blkp->throttled_reqs[is_write]);
qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
return ret;
}
/* Look for the next pending I/O request and schedule it.
*
* This assumes that tg->lock is held.
@@ -287,12 +313,12 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
if (!must_wait) {
/* Give preference to requests from the current blk */
if (qemu_in_coroutine() &&
qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
throttle_group_co_restart_queue(blk, is_write)) {
token = blk;
} else {
ThrottleTimers *tt = &blk_get_public(token)->throttle_timers;
int64_t now = qemu_clock_get_ns(tt->clock_type);
timer_mod(tt->timers[is_write], now + 1);
int64_t now = qemu_clock_get_ns(tg->clock_type);
timer_mod(tt->timers[is_write], now);
tg->any_timer_armed[is_write] = true;
}
tg->tokens[is_write] = token;
@@ -326,7 +352,10 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
if (must_wait || blkp->pending_reqs[is_write]) {
blkp->pending_reqs[is_write]++;
qemu_mutex_unlock(&tg->lock);
qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
qemu_co_queue_wait(&blkp->throttled_reqs[is_write],
&blkp->throttled_reqs_lock);
qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
qemu_mutex_lock(&tg->lock);
blkp->pending_reqs[is_write]--;
}
@@ -340,15 +369,50 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
qemu_mutex_unlock(&tg->lock);
}
typedef struct {
BlockBackend *blk;
bool is_write;
} RestartData;
static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
{
RestartData *data = opaque;
BlockBackend *blk = data->blk;
bool is_write = data->is_write;
BlockBackendPublic *blkp = blk_get_public(blk);
ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
bool empty_queue;
empty_queue = !throttle_group_co_restart_queue(blk, is_write);
/* If the request queue was empty then we have to take care of
* scheduling the next one */
if (empty_queue) {
qemu_mutex_lock(&tg->lock);
schedule_next_request(blk, is_write);
qemu_mutex_unlock(&tg->lock);
}
}
static void throttle_group_restart_queue(BlockBackend *blk, bool is_write)
{
Coroutine *co;
RestartData rd = {
.blk = blk,
.is_write = is_write
};
co = qemu_coroutine_create(throttle_group_restart_queue_entry, &rd);
aio_co_enter(blk_get_aio_context(blk), co);
}
void throttle_group_restart_blk(BlockBackend *blk)
{
BlockBackendPublic *blkp = blk_get_public(blk);
int i;
for (i = 0; i < 2; i++) {
while (qemu_co_enter_next(&blkp->throttled_reqs[i])) {
;
}
if (blkp->throttle_state) {
throttle_group_restart_queue(blk, 0);
throttle_group_restart_queue(blk, 1);
}
}
@@ -362,22 +426,13 @@ void throttle_group_restart_blk(BlockBackend *blk)
void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
{
BlockBackendPublic *blkp = blk_get_public(blk);
ThrottleTimers *tt = &blkp->throttle_timers;
ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
qemu_mutex_lock(&tg->lock);
/* throttle_config() cancels the timers */
if (timer_pending(tt->timers[0])) {
tg->any_timer_armed[0] = false;
}
if (timer_pending(tt->timers[1])) {
tg->any_timer_armed[1] = false;
}
throttle_config(ts, tt, cfg);
throttle_config(ts, tg->clock_type, cfg);
qemu_mutex_unlock(&tg->lock);
qemu_co_enter_next(&blkp->throttled_reqs[0]);
qemu_co_enter_next(&blkp->throttled_reqs[1]);
throttle_group_restart_blk(blk);
}
/* Get the throttle configuration from a particular group. Similar to
@@ -408,7 +463,6 @@ static void timer_cb(BlockBackend *blk, bool is_write)
BlockBackendPublic *blkp = blk_get_public(blk);
ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
bool empty_queue;
/* The timer has just been fired, so we can update the flag */
qemu_mutex_lock(&tg->lock);
@@ -416,17 +470,7 @@ static void timer_cb(BlockBackend *blk, bool is_write)
qemu_mutex_unlock(&tg->lock);
/* Run the request that was waiting for this timer */
aio_context_acquire(blk_get_aio_context(blk));
empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
aio_context_release(blk_get_aio_context(blk));
/* If the request queue was empty then we have to take care of
* scheduling the next one */
if (empty_queue) {
qemu_mutex_lock(&tg->lock);
schedule_next_request(blk, is_write);
qemu_mutex_unlock(&tg->lock);
}
throttle_group_restart_queue(blk, is_write);
}
static void read_timer_cb(void *opaque)
@@ -452,13 +496,6 @@ void throttle_group_register_blk(BlockBackend *blk, const char *groupname)
BlockBackendPublic *blkp = blk_get_public(blk);
ThrottleState *ts = throttle_group_incref(groupname);
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
int clock_type = QEMU_CLOCK_REALTIME;
if (qtest_enabled()) {
/* For testing block IO throttling only */
clock_type = QEMU_CLOCK_VIRTUAL;
}
blkp->throttle_state = ts;
qemu_mutex_lock(&tg->lock);
@@ -473,7 +510,7 @@ void throttle_group_register_blk(BlockBackend *blk, const char *groupname)
throttle_timers_init(&blkp->throttle_timers,
blk_get_aio_context(blk),
clock_type,
tg->clock_type,
read_timer_cb,
write_timer_cb,
blk);

View File

@@ -9,20 +9,17 @@ blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags
blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
# block/io.c
bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x"
bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u"
# block/stream.c
stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
stream_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d"
stream_start(void *bs, void *base, void *s) "bs %p base %p s %p"
# block/commit.c
commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
commit_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d"
commit_start(void *bs, void *base, void *top, void *s) "bs %p base %p top %p s %p"
# block/mirror.c
@@ -31,14 +28,14 @@ mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
mirror_before_flush(void *s) "s %p"
mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
mirror_before_sleep(void *s, int64_t cnt, int synced, uint64_t delay_ns) "s %p dirty count %"PRId64" synced %d delay %"PRIu64"ns"
mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
mirror_one_iteration(void *s, int64_t offset, uint64_t bytes) "s %p offset %" PRId64 " bytes %" PRIu64
mirror_iteration_done(void *s, int64_t offset, uint64_t bytes, int ret) "s %p offset %" PRId64 " bytes %" PRIu64 " ret %d"
mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
mirror_yield_in_flight(void *s, int64_t offset, int in_flight) "s %p offset %" PRId64 " in_flight %d"
# block/backup.c
backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d"
backup_do_cow_return(void *job, int64_t sector_num, int nb_sectors, int ret) "job %p sector_num %"PRId64" nb_sectors %d ret %d"
backup_do_cow_enter(void *job, int64_t start, int64_t offset, uint64_t bytes) "job %p start %" PRId64 " offset %" PRId64 " bytes %" PRIu64
backup_do_cow_return(void *job, int64_t offset, uint64_t bytes, int ret) "job %p offset %" PRId64 " bytes %" PRIu64 " ret %d"
backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"

View File

@@ -172,7 +172,7 @@ typedef struct {
/* VDI header (converted to host endianness). */
VdiHeader header;
CoMutex write_lock;
CoRwlock bmap_lock;
Error *migration_blocker;
} BDRVVdiState;
@@ -485,7 +485,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
goto fail_free_bmap;
}
qemu_co_mutex_init(&s->write_lock);
qemu_co_rwlock_init(&s->bmap_lock);
return 0;
@@ -557,7 +557,9 @@ vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
n_bytes, offset);
/* prepare next AIO request */
qemu_co_rwlock_rdlock(&s->bmap_lock);
bmap_entry = le32_to_cpu(s->bmap[block_index]);
qemu_co_rwlock_unlock(&s->bmap_lock);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Block not allocated, return zeros, no need to wait. */
qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
@@ -595,6 +597,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
uint32_t block_index;
uint32_t offset_in_block;
uint32_t n_bytes;
uint64_t data_offset;
uint32_t bmap_first = VDI_UNALLOCATED;
uint32_t bmap_last = VDI_UNALLOCATED;
uint8_t *block = NULL;
@@ -614,10 +617,19 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
n_bytes, offset);
/* prepare next AIO request */
qemu_co_rwlock_rdlock(&s->bmap_lock);
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Allocate new block and write to it. */
uint64_t data_offset;
qemu_co_rwlock_upgrade(&s->bmap_lock);
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (VDI_IS_ALLOCATED(bmap_entry)) {
/* A concurrent allocation did the work for us. */
qemu_co_rwlock_downgrade(&s->bmap_lock);
goto nonallocating_write;
}
bmap_entry = s->header.blocks_allocated;
s->bmap[block_index] = cpu_to_le32(bmap_entry);
s->header.blocks_allocated++;
@@ -635,30 +647,18 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
memset(block + offset_in_block + n_bytes, 0,
s->block_size - n_bytes - offset_in_block);
/* Note that this coroutine does not yield anywhere from reading the
* bmap entry until here, so in regards to all the coroutines trying
* to write to this cluster, the one doing the allocation will
* always be the first to try to acquire the lock.
* Therefore, it is also the first that will actually be able to
* acquire the lock and thus the padded cluster is written before
* the other coroutines can write to the affected area. */
qemu_co_mutex_lock(&s->write_lock);
/* Write the new block under CoRwLock write-side protection,
* so this full-cluster write does not overlap a partial write
* of the same cluster, issued from the "else" branch.
*/
ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size);
qemu_co_mutex_unlock(&s->write_lock);
qemu_co_rwlock_unlock(&s->bmap_lock);
} else {
uint64_t data_offset = s->header.offset_data +
(uint64_t)bmap_entry * s->block_size +
offset_in_block;
qemu_co_mutex_lock(&s->write_lock);
/* This lock is only used to make sure the following write operation
* is executed after the write issued by the coroutine allocating
* this cluster, therefore we do not need to keep it locked.
* As stated above, the allocating coroutine will always try to lock
* the mutex before all the other concurrent accesses to that
* cluster, therefore at this point we can be absolutely certain
* that that write operation has returned (there may be other writes
* in flight, but they do not concern this very operation). */
qemu_co_mutex_unlock(&s->write_lock);
nonallocating_write:
data_offset = s->header.offset_data +
(uint64_t)bmap_entry * s->block_size +
offset_in_block;
qemu_co_rwlock_unlock(&s->bmap_lock);
qemu_iovec_reset(&local_qiov);
qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
@@ -832,7 +832,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
if (image_type == VDI_TYPE_STATIC) {
ret = blk_truncate(blk, offset + blocks * block_size, errp);
ret = blk_truncate(blk, offset + blocks * block_size,
PREALLOC_MODE_OFF, errp);
if (ret < 0) {
error_prepend(errp, "Failed to statically allocate %s", filename);
goto exit;

View File

@@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
if (new_file_size % (1024*1024)) {
/* round up to nearest 1MB boundary */
new_file_size = ((new_file_size >> 20) + 1) << 20;
bdrv_truncate(bs->file, new_file_size, NULL);
bdrv_truncate(bs->file, new_file_size, PREALLOC_MODE_OFF, NULL);
}
}
qemu_vfree(desc_entries);

View File

@@ -1171,7 +1171,8 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
/* per the spec, the address for a block is in units of 1MB */
*new_offset = ROUND_UP(*new_offset, 1024 * 1024);
return bdrv_truncate(bs->file, *new_offset + s->block_size, NULL);
return bdrv_truncate(bs->file, *new_offset + s->block_size,
PREALLOC_MODE_OFF, NULL);
}
/*
@@ -1607,12 +1608,13 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
if (type == VHDX_TYPE_DYNAMIC) {
/* All zeroes, so we can just extend the file - the end of the BAT
* is the furthest thing we have written yet */
ret = blk_truncate(blk, data_file_offset, errp);
ret = blk_truncate(blk, data_file_offset, PREALLOC_MODE_OFF, errp);
if (ret < 0) {
goto exit;
}
} else if (type == VHDX_TYPE_FIXED) {
ret = blk_truncate(blk, data_file_offset + image_size, errp);
ret = blk_truncate(blk, data_file_offset + image_size,
PREALLOC_MODE_OFF, errp);
if (ret < 0) {
goto exit;
}

View File

@@ -242,10 +242,11 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
s->extents = g_renew(VmdkExtent, s->extents, s->num_extents);
}
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
/* Return -ve errno, or 0 on success and write CID into *pcid. */
static int vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid)
{
char *desc;
uint32_t cid = 0xffffffff;
uint32_t cid;
const char *p_name, *cid_str;
size_t cid_str_size;
BDRVVmdkState *s = bs->opaque;
@@ -254,8 +255,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
desc = g_malloc0(DESC_SIZE);
ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
g_free(desc);
return 0;
goto out;
}
if (parent) {
@@ -268,13 +268,21 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
desc[DESC_SIZE - 1] = '\0';
p_name = strstr(desc, cid_str);
if (p_name != NULL) {
p_name += cid_str_size;
sscanf(p_name, "%" SCNx32, &cid);
if (p_name == NULL) {
ret = -EINVAL;
goto out;
}
p_name += cid_str_size;
if (sscanf(p_name, "%" SCNx32, &cid) != 1) {
ret = -EINVAL;
goto out;
}
*pcid = cid;
ret = 0;
out:
g_free(desc);
return cid;
return ret;
}
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
@@ -322,7 +330,10 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
if (!s->cid_checked && bs->backing) {
BlockDriverState *p_bs = bs->backing->bs;
cur_pcid = vmdk_read_cid(p_bs, 0);
if (vmdk_read_cid(p_bs, 0, &cur_pcid) != 0) {
/* read failure: report as not valid */
return 0;
}
if (s->parent_cid != cur_pcid) {
/* CID not valid */
return 0;
@@ -975,8 +986,14 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
if (ret) {
goto fail;
}
s->cid = vmdk_read_cid(bs, 0);
s->parent_cid = vmdk_read_cid(bs, 1);
ret = vmdk_read_cid(bs, 0, &s->cid);
if (ret) {
goto fail;
}
ret = vmdk_read_cid(bs, 1, &s->parent_cid);
if (ret) {
goto fail;
}
qemu_co_mutex_init(&s->lock);
/* Disable migration when VMDK images are used */
@@ -1714,7 +1731,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
blk_set_allow_write_beyond_eof(blk, true);
if (flat) {
ret = blk_truncate(blk, filesize, errp);
ret = blk_truncate(blk, filesize, PREALLOC_MODE_OFF, errp);
goto exit;
}
magic = cpu_to_be32(VMDK4_MAGIC);
@@ -1777,7 +1794,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
goto exit;
}
ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, errp);
ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9,
PREALLOC_MODE_OFF, errp);
if (ret < 0) {
goto exit;
}
@@ -2007,8 +2025,11 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
ret = -EINVAL;
goto exit;
}
parent_cid = vmdk_read_cid(blk_bs(blk), 0);
ret = vmdk_read_cid(blk_bs(blk), 0, &parent_cid);
blk_unref(blk);
if (ret) {
goto exit;
}
snprintf(parent_desc_line, BUF_SIZE,
"parentFileNameHint=\"%s\"", backing_file);
}
@@ -2086,7 +2107,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
/* bdrv_pwrite write padding zeros to align to sector, we don't need that
* for description file */
if (desc_offset == 0) {
ret = blk_truncate(new_blk, desc_len, errp);
ret = blk_truncate(new_blk, desc_len, PREALLOC_MODE_OFF, errp);
}
exit:
if (new_blk) {

View File

@@ -460,17 +460,23 @@ static int vpc_reopen_prepare(BDRVReopenState *state,
/*
* Returns the absolute byte offset of the given sector in the image file.
* If the sector is not allocated, -1 is returned instead.
* If an error occurred trying to write an updated block bitmap back to
* the file, -2 is returned, and the error value is written to *err.
* This can only happen for a write operation.
*
* The parameter write must be 1 if the offset will be used for a write
* operation (the block bitmaps is updated then), 0 otherwise.
* If write is true then err must not be NULL.
*/
static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
bool write)
bool write, int *err)
{
BDRVVPCState *s = bs->opaque;
uint64_t bitmap_offset, block_offset;
uint32_t pagetable_index, offset_in_block;
assert(!(write && err == NULL));
pagetable_index = offset / s->block_size;
offset_in_block = offset % s->block_size;
@@ -487,21 +493,20 @@ static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
correctness. */
if (write && (s->last_bitmap_offset != bitmap_offset)) {
uint8_t bitmap[s->bitmap_size];
int r;
s->last_bitmap_offset = bitmap_offset;
memset(bitmap, 0xff, s->bitmap_size);
bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
r = bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
if (r < 0) {
*err = r;
return -2;
}
}
return block_offset;
}
static inline int64_t get_sector_offset(BlockDriverState *bs,
int64_t sector_num, bool write)
{
return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
}
/*
* Writes the footer to the end of the image file. This is needed when the
* file grows as it overwrites the old footer
@@ -567,7 +572,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
if (ret < 0)
goto fail;
return get_image_offset(bs, offset, false);
return get_image_offset(bs, offset, false, NULL);
fail:
s->free_data_block_offset -= (s->block_size + s->bitmap_size);
@@ -607,7 +612,7 @@ vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
qemu_iovec_init(&local_qiov, qiov->niov);
while (bytes > 0) {
image_offset = get_image_offset(bs, offset, false);
image_offset = get_image_offset(bs, offset, false, NULL);
n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
if (image_offset == -1) {
@@ -656,7 +661,11 @@ vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
qemu_iovec_init(&local_qiov, qiov->niov);
while (bytes > 0) {
image_offset = get_image_offset(bs, offset, true);
image_offset = get_image_offset(bs, offset, true, &ret);
if (image_offset == -2) {
/* Failed to write block bitmap: can't proceed with write */
goto fail;
}
n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
if (image_offset == -1) {
@@ -696,19 +705,23 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
VHDFooter *footer = (VHDFooter*) s->footer_buf;
int64_t start, offset;
bool allocated;
int64_t ret;
int n;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
*pnum = nb_sectors;
*file = bs->file->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
(sector_num << BDRV_SECTOR_BITS);
}
offset = get_sector_offset(bs, sector_num, 0);
qemu_co_mutex_lock(&s->lock);
offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false, NULL);
start = offset;
allocated = (offset != -1);
*pnum = 0;
ret = 0;
do {
/* All sectors in a block are contiguous (without using the bitmap) */
@@ -723,15 +736,18 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
* sectors since there is always a bitmap in between. */
if (allocated) {
*file = bs->file->bs;
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
break;
}
if (nb_sectors == 0) {
break;
}
offset = get_sector_offset(bs, sector_num, 0);
offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false,
NULL);
} while (offset == -1);
return 0;
qemu_co_mutex_unlock(&s->lock);
return ret;
}
/*
@@ -858,7 +874,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
/* Add footer to total size */
total_size += HEADER_SIZE;
ret = blk_truncate(blk, total_size, errp);
ret = blk_truncate(blk, total_size, PREALLOC_MODE_OFF, errp);
if (ret < 0) {
return ret;
}

File diff suppressed because it is too large Load Diff

View File

@@ -27,6 +27,10 @@ typedef struct NBDServerData {
static NBDServerData *nbd_server;
static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
{
nbd_client_put(client);
}
static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
gpointer opaque)
@@ -46,7 +50,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
nbd_client_new(NULL, cioc,
nbd_server->tlscreds, NULL,
nbd_client_put);
nbd_blockdev_client_closed);
object_unref(OBJECT(cioc));
return TRUE;
}

View File

@@ -50,6 +50,7 @@
#include "qmp-commands.h"
#include "block/trace.h"
#include "sysemu/arch_init.h"
#include "sysemu/qtest.h"
#include "qemu/cutils.h"
#include "qemu/help_option.h"
#include "qemu/throttle-options.h"
@@ -334,8 +335,9 @@ static bool parse_stats_intervals(BlockAcctStats *stats, QList *intervals,
break;
}
case QTYPE_QINT: {
int64_t length = qint_get_int(qobject_to_qint(entry->value));
case QTYPE_QNUM: {
int64_t length = qnum_get_int(qobject_to_qnum(entry->value));
if (length > 0 && length <= UINT_MAX) {
block_acct_add_interval(stats, (unsigned) length);
} else {
@@ -591,11 +593,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
bs->detect_zeroes = detect_zeroes;
if (bdrv_key_required(bs)) {
autostart = 0;
}
block_acct_init(blk_get_stats(blk), account_invalid, account_failed);
block_acct_setup(blk_get_stats(blk), account_invalid, account_failed);
if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) {
blk_unref(blk);
@@ -797,6 +795,9 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
const char *filename;
Error *local_err = NULL;
int i;
const char *deprecated[] = {
"serial", "trans", "secs", "heads", "cyls", "addr"
};
/* Change legacy command line options into QMP ones */
static const struct {
@@ -880,6 +881,16 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
"update your scripts.\n");
}
/* Other deprecated options */
if (!qtest_enabled()) {
for (i = 0; i < ARRAY_SIZE(deprecated); i++) {
if (qemu_opt_get(legacy_opts, deprecated[i]) != NULL) {
error_report("'%s' is deprecated, please use the corresponding "
"option of '-device' instead", deprecated[i]);
}
}
}
/* Media type */
value = qemu_opt_get(legacy_opts, "media");
if (value) {
@@ -899,7 +910,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
copy_on_read = qemu_opt_get_bool(legacy_opts, "copy-on-read", false);
if (read_only && copy_on_read) {
error_report("warning: disabling copy-on-read on read-only drive");
warn_report("disabling copy-on-read on read-only drive");
copy_on_read = false;
}
@@ -1362,12 +1373,10 @@ out_aio_context:
static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
const char *name,
BlockDriverState **pbs,
AioContext **paio,
Error **errp)
{
BlockDriverState *bs;
BdrvDirtyBitmap *bitmap;
AioContext *aio_context;
if (!node) {
error_setg(errp, "Node cannot be NULL");
@@ -1383,29 +1392,17 @@ static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
return NULL;
}
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
bitmap = bdrv_find_dirty_bitmap(bs, name);
if (!bitmap) {
error_setg(errp, "Dirty bitmap '%s' not found", name);
goto fail;
return NULL;
}
if (pbs) {
*pbs = bs;
}
if (paio) {
*paio = aio_context;
} else {
aio_context_release(aio_context);
}
return bitmap;
fail:
aio_context_release(aio_context);
return NULL;
}
/* New and old BlockDriverState structs for atomic group operations */
@@ -1713,7 +1710,8 @@ static void external_snapshot_prepare(BlkActionState *common,
}
flags = state->old_bs->open_flags;
flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_COPY_ON_READ);
flags |= BDRV_O_NO_BACKING;
/* create new image w/backing file */
mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@@ -1738,8 +1736,6 @@ static void external_snapshot_prepare(BlkActionState *common,
qdict_put_str(options, "node-name", snapshot_node_name);
}
qdict_put_str(options, "driver", format);
flags |= BDRV_O_NO_BACKING;
}
state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags,
@@ -1791,7 +1787,7 @@ static void external_snapshot_commit(BlkActionState *common)
/* We don't need (or want) to use the transactional
* bdrv_reopen_multiple() across all the entries at once, because we
* don't want to abort all of them if one of them fails the reopen */
if (!state->old_bs->copy_on_read) {
if (!atomic_read(&state->old_bs->copy_on_read)) {
bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
NULL);
}
@@ -1803,7 +1799,11 @@ static void external_snapshot_abort(BlkActionState *common)
DO_UPCAST(ExternalSnapshotState, common, common);
if (state->new_bs) {
if (state->overlay_appended) {
bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd()
close state->old_bs; we need it */
bdrv_set_backing_hd(state->new_bs, NULL, &error_abort);
bdrv_replace_node(state->new_bs, state->old_bs, &error_abort);
bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */
}
}
}
@@ -1982,6 +1982,8 @@ static void block_dirty_bitmap_add_prepare(BlkActionState *common,
/* AIO context taken and released within qmp_block_dirty_bitmap_add */
qmp_block_dirty_bitmap_add(action->node, action->name,
action->has_granularity, action->granularity,
action->has_persistent, action->persistent,
action->has_autoload, action->autoload,
&local_err);
if (!local_err) {
@@ -2021,7 +2023,6 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
state->bitmap = block_dirty_bitmap_lookup(action->node,
action->name,
&state->bs,
&state->aio_context,
errp);
if (!state->bitmap) {
return;
@@ -2033,6 +2034,9 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
} else if (!bdrv_dirty_bitmap_enabled(state->bitmap)) {
error_setg(errp, "Cannot clear a disabled bitmap");
return;
} else if (bdrv_dirty_bitmap_readonly(state->bitmap)) {
error_setg(errp, "Cannot clear a readonly bitmap");
return;
}
bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
@@ -2261,24 +2265,8 @@ void qmp_block_passwd(bool has_device, const char *device,
bool has_node_name, const char *node_name,
const char *password, Error **errp)
{
Error *local_err = NULL;
BlockDriverState *bs;
AioContext *aio_context;
bs = bdrv_lookup_bs(has_device ? device : NULL,
has_node_name ? node_name : NULL,
&local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
bdrv_add_key(bs, password, errp);
aio_context_release(aio_context);
error_setg(errp,
"Setting block passwords directly is no longer supported");
}
/*
@@ -2587,12 +2575,6 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
goto fail;
}
bdrv_add_key(medium_bs, NULL, &err);
if (err) {
error_propagate(errp, err);
goto fail;
}
rc = do_open_tray(has_device ? device : NULL,
has_id ? id : NULL,
false, &err);
@@ -2727,10 +2709,12 @@ out:
void qmp_block_dirty_bitmap_add(const char *node, const char *name,
bool has_granularity, uint32_t granularity,
bool has_persistent, bool persistent,
bool has_autoload, bool autoload,
Error **errp)
{
AioContext *aio_context;
BlockDriverState *bs;
BdrvDirtyBitmap *bitmap;
if (!name || name[0] == '\0') {
error_setg(errp, "Bitmap name cannot be empty");
@@ -2742,34 +2726,53 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
return;
}
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
if (has_granularity) {
if (granularity < 512 || !is_power_of_2(granularity)) {
error_setg(errp, "Granularity must be power of 2 "
"and at least 512");
goto out;
return;
}
} else {
/* Default to cluster size, if available: */
granularity = bdrv_get_default_bitmap_granularity(bs);
}
bdrv_create_dirty_bitmap(bs, granularity, name, errp);
if (!has_persistent) {
persistent = false;
}
if (!has_autoload) {
autoload = false;
}
out:
aio_context_release(aio_context);
if (has_autoload && !persistent) {
error_setg(errp, "Autoload flag must be used only for persistent "
"bitmaps");
return;
}
if (persistent &&
!bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp))
{
return;
}
bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp);
if (bitmap == NULL) {
return;
}
bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
bdrv_dirty_bitmap_set_autoload(bitmap, autoload);
}
void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
Error **errp)
{
AioContext *aio_context;
BlockDriverState *bs;
BdrvDirtyBitmap *bitmap;
Error *local_err = NULL;
bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
if (!bitmap || !bs) {
return;
}
@@ -2778,13 +2781,19 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
error_setg(errp,
"Bitmap '%s' is currently frozen and cannot be removed",
name);
goto out;
return;
}
if (bdrv_dirty_bitmap_get_persistance(bitmap)) {
bdrv_remove_persistent_dirty_bitmap(bs, name, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
return;
}
}
bdrv_dirty_bitmap_make_anon(bitmap);
bdrv_release_dirty_bitmap(bs, bitmap);
out:
aio_context_release(aio_context);
}
/**
@@ -2794,11 +2803,10 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
Error **errp)
{
AioContext *aio_context;
BdrvDirtyBitmap *bitmap;
BlockDriverState *bs;
bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
if (!bitmap || !bs) {
return;
}
@@ -2807,18 +2815,43 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
error_setg(errp,
"Bitmap '%s' is currently frozen and cannot be modified",
name);
goto out;
return;
} else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
error_setg(errp,
"Bitmap '%s' is currently disabled and cannot be cleared",
name);
goto out;
return;
} else if (bdrv_dirty_bitmap_readonly(bitmap)) {
error_setg(errp, "Bitmap '%s' is readonly and cannot be cleared", name);
return;
}
bdrv_clear_dirty_bitmap(bitmap, NULL);
}
out:
aio_context_release(aio_context);
BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
const char *name,
Error **errp)
{
BdrvDirtyBitmap *bitmap;
BlockDriverState *bs;
BlockDirtyBitmapSha256 *ret = NULL;
char *sha256;
bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
if (!bitmap || !bs) {
return NULL;
}
sha256 = bdrv_dirty_bitmap_sha256(bitmap, errp);
if (sha256 == NULL) {
return NULL;
}
ret = g_new(BlockDirtyBitmapSha256, 1);
ret->sha256 = sha256;
return ret;
}
void hmp_drive_del(Monitor *mon, const QDict *qdict)
@@ -2924,7 +2957,7 @@ void qmp_block_resize(bool has_device, const char *device,
}
bdrv_drained_begin(bs);
ret = blk_truncate(blk, size, errp);
ret = blk_truncate(blk, size, PREALLOC_MODE_OFF, errp);
bdrv_drained_end(bs);
out:
@@ -3514,6 +3547,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
backing_mode = MIRROR_OPEN_BACKING_CHAIN;
}
/* Don't open backing image in create() */
flags |= BDRV_O_NO_BACKING;
if ((arg->sync == MIRROR_SYNC_MODE_FULL || !source)
&& arg->mode != NEW_IMAGE_MODE_EXISTING)
{
@@ -3553,8 +3589,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
/* Mirroring takes care of copy-on-write using the source's backing
* file.
*/
target_bs = bdrv_open(arg->target, NULL, options,
flags | BDRV_O_NO_BACKING, errp);
target_bs = bdrv_open(arg->target, NULL, options, flags, errp);
if (!target_bs) {
goto out;
}
@@ -3715,7 +3750,6 @@ void qmp_block_job_resume(const char *device, Error **errp)
}
trace_qmp_block_job_resume(job);
block_job_iostatus_reset(job);
block_job_user_resume(job);
aio_context_release(aio_context);
}
@@ -3878,13 +3912,6 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
if (bs && bdrv_key_required(bs)) {
QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
bdrv_unref(bs);
error_setg(errp, "blockdev-add doesn't support encrypted devices");
goto fail;
}
fail:
visit_free(v);
}

View File

@@ -55,35 +55,20 @@ struct BlockJobTxn {
static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
static char *child_job_get_parent_desc(BdrvChild *c)
{
BlockJob *job = c->opaque;
return g_strdup_printf("%s job '%s'",
BlockJobType_lookup[job->driver->job_type],
job->id);
}
static const BdrvChildRole child_job = {
.get_parent_desc = child_job_get_parent_desc,
.stay_at_node = true,
};
static void block_job_drained_begin(void *opaque)
{
BlockJob *job = opaque;
block_job_pause(job);
}
static void block_job_drained_end(void *opaque)
{
BlockJob *job = opaque;
block_job_resume(job);
}
static const BlockDevOps block_job_dev_ops = {
.drained_begin = block_job_drained_begin,
.drained_end = block_job_drained_end,
};
/*
* The block job API is composed of two categories of functions.
*
* The first includes functions used by the monitor. The monitor is
* peculiar in that it accesses the block job list with block_job_get, and
* therefore needs consistency across block_job_get and the actual operation
* (e.g. block_job_set_speed). The consistency is achieved with
* aio_context_acquire/release. These functions are declared in blockjob.h.
*
* The second includes functions used by the block job drivers and sometimes
* by the core block layer. These do not care about locking, because the
* whole coroutine runs under the AioContext lock, and are declared in
* blockjob_int.h.
*/
BlockJob *block_job_next(BlockJob *job)
{
@@ -106,6 +91,80 @@ BlockJob *block_job_get(const char *id)
return NULL;
}
BlockJobTxn *block_job_txn_new(void)
{
BlockJobTxn *txn = g_new0(BlockJobTxn, 1);
QLIST_INIT(&txn->jobs);
txn->refcnt = 1;
return txn;
}
static void block_job_txn_ref(BlockJobTxn *txn)
{
txn->refcnt++;
}
void block_job_txn_unref(BlockJobTxn *txn)
{
if (txn && --txn->refcnt == 0) {
g_free(txn);
}
}
void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job)
{
if (!txn) {
return;
}
assert(!job->txn);
job->txn = txn;
QLIST_INSERT_HEAD(&txn->jobs, job, txn_list);
block_job_txn_ref(txn);
}
static void block_job_pause(BlockJob *job)
{
job->pause_count++;
}
static void block_job_resume(BlockJob *job)
{
assert(job->pause_count > 0);
job->pause_count--;
if (job->pause_count) {
return;
}
block_job_enter(job);
}
void block_job_ref(BlockJob *job)
{
++job->refcnt;
}
static void block_job_attached_aio_context(AioContext *new_context,
void *opaque);
static void block_job_detach_aio_context(void *opaque);
void block_job_unref(BlockJob *job)
{
if (--job->refcnt == 0) {
BlockDriverState *bs = blk_bs(job->blk);
bs->job = NULL;
block_job_remove_all_bdrv(job);
blk_remove_aio_context_notifier(job->blk,
block_job_attached_aio_context,
block_job_detach_aio_context, job);
blk_unref(job->blk);
error_free(job->blocker);
g_free(job->id);
QLIST_REMOVE(job, job_list);
g_free(job);
}
}
static void block_job_attached_aio_context(AioContext *new_context,
void *opaque)
{
@@ -145,6 +204,36 @@ static void block_job_detach_aio_context(void *opaque)
block_job_unref(job);
}
static char *child_job_get_parent_desc(BdrvChild *c)
{
BlockJob *job = c->opaque;
return g_strdup_printf("%s job '%s'",
BlockJobType_lookup[job->driver->job_type],
job->id);
}
static const BdrvChildRole child_job = {
.get_parent_desc = child_job_get_parent_desc,
.stay_at_node = true,
};
static void block_job_drained_begin(void *opaque)
{
BlockJob *job = opaque;
block_job_pause(job);
}
static void block_job_drained_end(void *opaque)
{
BlockJob *job = opaque;
block_job_resume(job);
}
static const BlockDevOps block_job_dev_ops = {
.drained_begin = block_job_drained_begin,
.drained_end = block_job_drained_end,
};
void block_job_remove_all_bdrv(BlockJob *job)
{
GSList *l;
@@ -175,6 +264,350 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
return 0;
}
bool block_job_is_internal(BlockJob *job)
{
return (job->id == NULL);
}
static bool block_job_started(BlockJob *job)
{
return job->co;
}
/**
* All jobs must allow a pause point before entering their job proper. This
* ensures that jobs can be paused prior to being started, then resumed later.
*/
static void coroutine_fn block_job_co_entry(void *opaque)
{
BlockJob *job = opaque;
assert(job && job->driver && job->driver->start);
block_job_pause_point(job);
job->driver->start(job);
}
void block_job_start(BlockJob *job)
{
assert(job && !block_job_started(job) && job->paused &&
job->driver && job->driver->start);
job->co = qemu_coroutine_create(block_job_co_entry, job);
job->pause_count--;
job->busy = true;
job->paused = false;
bdrv_coroutine_enter(blk_bs(job->blk), job->co);
}
static void block_job_completed_single(BlockJob *job)
{
assert(job->completed);
if (!job->ret) {
if (job->driver->commit) {
job->driver->commit(job);
}
} else {
if (job->driver->abort) {
job->driver->abort(job);
}
}
if (job->driver->clean) {
job->driver->clean(job);
}
if (job->cb) {
job->cb(job->opaque, job->ret);
}
/* Emit events only if we actually started */
if (block_job_started(job)) {
if (block_job_is_cancelled(job)) {
block_job_event_cancelled(job);
} else {
const char *msg = NULL;
if (job->ret < 0) {
msg = strerror(-job->ret);
}
block_job_event_completed(job, msg);
}
}
if (job->txn) {
QLIST_REMOVE(job, txn_list);
block_job_txn_unref(job->txn);
}
block_job_unref(job);
}
static void block_job_cancel_async(BlockJob *job)
{
if (job->iostatus != BLOCK_DEVICE_IO_STATUS_OK) {
block_job_iostatus_reset(job);
}
if (job->user_paused) {
/* Do not call block_job_enter here, the caller will handle it. */
job->user_paused = false;
job->pause_count--;
}
job->cancelled = true;
}
static int block_job_finish_sync(BlockJob *job,
void (*finish)(BlockJob *, Error **errp),
Error **errp)
{
Error *local_err = NULL;
int ret;
assert(blk_bs(job->blk)->job == job);
block_job_ref(job);
if (finish) {
finish(job, &local_err);
}
if (local_err) {
error_propagate(errp, local_err);
block_job_unref(job);
return -EBUSY;
}
/* block_job_drain calls block_job_enter, and it should be enough to
* induce progress until the job completes or moves to the main thread.
*/
while (!job->deferred_to_main_loop && !job->completed) {
block_job_drain(job);
}
while (!job->completed) {
aio_poll(qemu_get_aio_context(), true);
}
ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
block_job_unref(job);
return ret;
}
static void block_job_completed_txn_abort(BlockJob *job)
{
AioContext *ctx;
BlockJobTxn *txn = job->txn;
BlockJob *other_job;
if (txn->aborting) {
/*
* We are cancelled by another job, which will handle everything.
*/
return;
}
txn->aborting = true;
block_job_txn_ref(txn);
/* We are the first failed job. Cancel other jobs. */
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
ctx = blk_get_aio_context(other_job->blk);
aio_context_acquire(ctx);
}
/* Other jobs are effectively cancelled by us, set the status for
* them; this job, however, may or may not be cancelled, depending
* on the caller, so leave it. */
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (other_job != job) {
block_job_cancel_async(other_job);
}
}
while (!QLIST_EMPTY(&txn->jobs)) {
other_job = QLIST_FIRST(&txn->jobs);
ctx = blk_get_aio_context(other_job->blk);
if (!other_job->completed) {
assert(other_job->cancelled);
block_job_finish_sync(other_job, NULL, NULL);
}
block_job_completed_single(other_job);
aio_context_release(ctx);
}
block_job_txn_unref(txn);
}
static void block_job_completed_txn_success(BlockJob *job)
{
AioContext *ctx;
BlockJobTxn *txn = job->txn;
BlockJob *other_job, *next;
/*
* Successful completion, see if there are other running jobs in this
* txn.
*/
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (!other_job->completed) {
return;
}
}
/* We are the last completed job, commit the transaction. */
QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
ctx = blk_get_aio_context(other_job->blk);
aio_context_acquire(ctx);
assert(other_job->ret == 0);
block_job_completed_single(other_job);
aio_context_release(ctx);
}
}
void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
Error *local_err = NULL;
if (!job->driver->set_speed) {
error_setg(errp, QERR_UNSUPPORTED);
return;
}
job->driver->set_speed(job, speed, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
job->speed = speed;
}
void block_job_complete(BlockJob *job, Error **errp)
{
/* Should not be reachable via external interface for internal jobs */
assert(job->id);
if (job->pause_count || job->cancelled ||
!block_job_started(job) || !job->driver->complete) {
error_setg(errp, "The active block job '%s' cannot be completed",
job->id);
return;
}
job->driver->complete(job, errp);
}
void block_job_user_pause(BlockJob *job)
{
job->user_paused = true;
block_job_pause(job);
}
bool block_job_user_paused(BlockJob *job)
{
return job->user_paused;
}
void block_job_user_resume(BlockJob *job)
{
if (job && job->user_paused && job->pause_count > 0) {
block_job_iostatus_reset(job);
job->user_paused = false;
block_job_resume(job);
}
}
void block_job_cancel(BlockJob *job)
{
if (block_job_started(job)) {
block_job_cancel_async(job);
block_job_enter(job);
} else {
block_job_completed(job, -ECANCELED);
}
}
/* A wrapper around block_job_cancel() taking an Error ** parameter so it may be
* used with block_job_finish_sync() without the need for (rather nasty)
* function pointer casts there. */
static void block_job_cancel_err(BlockJob *job, Error **errp)
{
block_job_cancel(job);
}
int block_job_cancel_sync(BlockJob *job)
{
return block_job_finish_sync(job, &block_job_cancel_err, NULL);
}
void block_job_cancel_sync_all(void)
{
BlockJob *job;
AioContext *aio_context;
while ((job = QLIST_FIRST(&block_jobs))) {
aio_context = blk_get_aio_context(job->blk);
aio_context_acquire(aio_context);
block_job_cancel_sync(job);
aio_context_release(aio_context);
}
}
int block_job_complete_sync(BlockJob *job, Error **errp)
{
return block_job_finish_sync(job, &block_job_complete, errp);
}
BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
{
BlockJobInfo *info;
if (block_job_is_internal(job)) {
error_setg(errp, "Cannot query QEMU internal jobs");
return NULL;
}
info = g_new0(BlockJobInfo, 1);
info->type = g_strdup(BlockJobType_lookup[job->driver->job_type]);
info->device = g_strdup(job->id);
info->len = job->len;
info->busy = job->busy;
info->paused = job->pause_count > 0;
info->offset = job->offset;
info->speed = job->speed;
info->io_status = job->iostatus;
info->ready = job->ready;
return info;
}
static void block_job_iostatus_set_err(BlockJob *job, int error)
{
if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
BLOCK_DEVICE_IO_STATUS_FAILED;
}
}
static void block_job_event_cancelled(BlockJob *job)
{
if (block_job_is_internal(job)) {
return;
}
qapi_event_send_block_job_cancelled(job->driver->job_type,
job->id,
job->len,
job->offset,
job->speed,
&error_abort);
}
static void block_job_event_completed(BlockJob *job, const char *msg)
{
if (block_job_is_internal(job)) {
return;
}
qapi_event_send_block_job_completed(job->driver->job_type,
job->id,
job->len,
job->offset,
job->speed,
!!msg,
msg,
&error_abort);
}
/*
* API for block job drivers and the block layer. These functions are
* declared in blockjob_int.h.
*/
void *block_job_create(const char *job_id, const BlockJobDriver *driver,
BlockDriverState *bs, uint64_t perm,
uint64_t shared_perm, int64_t speed, int flags,
@@ -259,163 +692,23 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
return job;
}
bool block_job_is_internal(BlockJob *job)
void block_job_pause_all(void)
{
return (job->id == NULL);
}
BlockJob *job = NULL;
while ((job = block_job_next(job))) {
AioContext *aio_context = blk_get_aio_context(job->blk);
static bool block_job_started(BlockJob *job)
{
return job->co;
}
/**
* All jobs must allow a pause point before entering their job proper. This
* ensures that jobs can be paused prior to being started, then resumed later.
*/
static void coroutine_fn block_job_co_entry(void *opaque)
{
BlockJob *job = opaque;
assert(job && job->driver && job->driver->start);
block_job_pause_point(job);
job->driver->start(job);
}
void block_job_start(BlockJob *job)
{
assert(job && !block_job_started(job) && job->paused &&
job->driver && job->driver->start);
job->co = qemu_coroutine_create(block_job_co_entry, job);
job->pause_count--;
job->busy = true;
job->paused = false;
bdrv_coroutine_enter(blk_bs(job->blk), job->co);
}
void block_job_ref(BlockJob *job)
{
++job->refcnt;
}
void block_job_unref(BlockJob *job)
{
if (--job->refcnt == 0) {
BlockDriverState *bs = blk_bs(job->blk);
bs->job = NULL;
block_job_remove_all_bdrv(job);
blk_remove_aio_context_notifier(job->blk,
block_job_attached_aio_context,
block_job_detach_aio_context, job);
blk_unref(job->blk);
error_free(job->blocker);
g_free(job->id);
QLIST_REMOVE(job, job_list);
g_free(job);
aio_context_acquire(aio_context);
block_job_pause(job);
aio_context_release(aio_context);
}
}
static void block_job_completed_single(BlockJob *job)
void block_job_early_fail(BlockJob *job)
{
if (!job->ret) {
if (job->driver->commit) {
job->driver->commit(job);
}
} else {
if (job->driver->abort) {
job->driver->abort(job);
}
}
if (job->driver->clean) {
job->driver->clean(job);
}
if (job->cb) {
job->cb(job->opaque, job->ret);
}
/* Emit events only if we actually started */
if (block_job_started(job)) {
if (block_job_is_cancelled(job)) {
block_job_event_cancelled(job);
} else {
const char *msg = NULL;
if (job->ret < 0) {
msg = strerror(-job->ret);
}
block_job_event_completed(job, msg);
}
}
if (job->txn) {
QLIST_REMOVE(job, txn_list);
block_job_txn_unref(job->txn);
}
block_job_unref(job);
}
static void block_job_completed_txn_abort(BlockJob *job)
{
AioContext *ctx;
BlockJobTxn *txn = job->txn;
BlockJob *other_job, *next;
if (txn->aborting) {
/*
* We are cancelled by another job, which will handle everything.
*/
return;
}
txn->aborting = true;
/* We are the first failed job. Cancel other jobs. */
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
ctx = blk_get_aio_context(other_job->blk);
aio_context_acquire(ctx);
}
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (other_job == job || other_job->completed) {
/* Other jobs are "effectively" cancelled by us, set the status for
* them; this job, however, may or may not be cancelled, depending
* on the caller, so leave it. */
if (other_job != job) {
other_job->cancelled = true;
}
continue;
}
block_job_cancel_sync(other_job);
assert(other_job->completed);
}
QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
ctx = blk_get_aio_context(other_job->blk);
block_job_completed_single(other_job);
aio_context_release(ctx);
}
}
static void block_job_completed_txn_success(BlockJob *job)
{
AioContext *ctx;
BlockJobTxn *txn = job->txn;
BlockJob *other_job, *next;
/*
* Successful completion, see if there are other running jobs in this
* txn.
*/
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (!other_job->completed) {
return;
}
}
/* We are the last completed job, commit the transaction. */
QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
ctx = blk_get_aio_context(other_job->blk);
aio_context_acquire(ctx);
assert(other_job->ret == 0);
block_job_completed_single(other_job);
aio_context_release(ctx);
}
}
void block_job_completed(BlockJob *job, int ret)
{
assert(blk_bs(job->blk)->job == job);
@@ -431,58 +724,11 @@ void block_job_completed(BlockJob *job, int ret)
}
}
void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
Error *local_err = NULL;
if (!job->driver->set_speed) {
error_setg(errp, QERR_UNSUPPORTED);
return;
}
job->driver->set_speed(job, speed, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
job->speed = speed;
}
void block_job_complete(BlockJob *job, Error **errp)
{
/* Should not be reachable via external interface for internal jobs */
assert(job->id);
if (job->pause_count || job->cancelled ||
!block_job_started(job) || !job->driver->complete) {
error_setg(errp, "The active block job '%s' cannot be completed",
job->id);
return;
}
job->driver->complete(job, errp);
}
void block_job_pause(BlockJob *job)
{
job->pause_count++;
}
void block_job_user_pause(BlockJob *job)
{
job->user_paused = true;
block_job_pause(job);
}
static bool block_job_should_pause(BlockJob *job)
{
return job->pause_count > 0;
}
bool block_job_user_paused(BlockJob *job)
{
return job ? job->user_paused : 0;
}
void coroutine_fn block_job_pause_point(BlockJob *job)
{
assert(job && block_job_started(job));
@@ -511,39 +757,29 @@ void coroutine_fn block_job_pause_point(BlockJob *job)
}
}
void block_job_resume(BlockJob *job)
void block_job_resume_all(void)
{
assert(job->pause_count > 0);
job->pause_count--;
if (job->pause_count) {
return;
}
block_job_enter(job);
}
BlockJob *job = NULL;
while ((job = block_job_next(job))) {
AioContext *aio_context = blk_get_aio_context(job->blk);
void block_job_user_resume(BlockJob *job)
{
if (job && job->user_paused && job->pause_count > 0) {
job->user_paused = false;
aio_context_acquire(aio_context);
block_job_resume(job);
aio_context_release(aio_context);
}
}
void block_job_enter(BlockJob *job)
{
if (job->co && !job->busy) {
bdrv_coroutine_enter(blk_bs(job->blk), job->co);
if (!block_job_started(job)) {
return;
}
if (job->deferred_to_main_loop) {
return;
}
}
void block_job_cancel(BlockJob *job)
{
if (block_job_started(job)) {
job->cancelled = true;
block_job_iostatus_reset(job);
block_job_enter(job);
} else {
block_job_completed(job, -ECANCELED);
if (!job->busy) {
bdrv_coroutine_enter(blk_bs(job->blk), job->co);
}
}
@@ -552,76 +788,6 @@ bool block_job_is_cancelled(BlockJob *job)
return job->cancelled;
}
void block_job_iostatus_reset(BlockJob *job)
{
job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
if (job->driver->iostatus_reset) {
job->driver->iostatus_reset(job);
}
}
static int block_job_finish_sync(BlockJob *job,
void (*finish)(BlockJob *, Error **errp),
Error **errp)
{
Error *local_err = NULL;
int ret;
assert(blk_bs(job->blk)->job == job);
block_job_ref(job);
finish(job, &local_err);
if (local_err) {
error_propagate(errp, local_err);
block_job_unref(job);
return -EBUSY;
}
/* block_job_drain calls block_job_enter, and it should be enough to
* induce progress until the job completes or moves to the main thread.
*/
while (!job->deferred_to_main_loop && !job->completed) {
block_job_drain(job);
}
while (!job->completed) {
aio_poll(qemu_get_aio_context(), true);
}
ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
block_job_unref(job);
return ret;
}
/* A wrapper around block_job_cancel() taking an Error ** parameter so it may be
* used with block_job_finish_sync() without the need for (rather nasty)
* function pointer casts there. */
static void block_job_cancel_err(BlockJob *job, Error **errp)
{
block_job_cancel(job);
}
int block_job_cancel_sync(BlockJob *job)
{
return block_job_finish_sync(job, &block_job_cancel_err, NULL);
}
void block_job_cancel_sync_all(void)
{
BlockJob *job;
AioContext *aio_context;
while ((job = QLIST_FIRST(&block_jobs))) {
aio_context = blk_get_aio_context(job->blk);
aio_context_acquire(aio_context);
block_job_cancel_sync(job);
aio_context_release(aio_context);
}
}
int block_job_complete_sync(BlockJob *job, Error **errp)
{
return block_job_finish_sync(job, &block_job_complete, errp);
}
void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
{
assert(job->busy);
@@ -658,63 +824,13 @@ void block_job_yield(BlockJob *job)
block_job_pause_point(job);
}
BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
{
BlockJobInfo *info;
if (block_job_is_internal(job)) {
error_setg(errp, "Cannot query QEMU internal jobs");
return NULL;
}
info = g_new0(BlockJobInfo, 1);
info->type = g_strdup(BlockJobType_lookup[job->driver->job_type]);
info->device = g_strdup(job->id);
info->len = job->len;
info->busy = job->busy;
info->paused = job->pause_count > 0;
info->offset = job->offset;
info->speed = job->speed;
info->io_status = job->iostatus;
info->ready = job->ready;
return info;
}
static void block_job_iostatus_set_err(BlockJob *job, int error)
void block_job_iostatus_reset(BlockJob *job)
{
if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
BLOCK_DEVICE_IO_STATUS_FAILED;
}
}
static void block_job_event_cancelled(BlockJob *job)
{
if (block_job_is_internal(job)) {
return;
}
qapi_event_send_block_job_cancelled(job->driver->job_type,
job->id,
job->len,
job->offset,
job->speed,
&error_abort);
}
static void block_job_event_completed(BlockJob *job, const char *msg)
{
if (block_job_is_internal(job)) {
return;
}
qapi_event_send_block_job_completed(job->driver->job_type,
job->id,
job->len,
job->offset,
job->speed,
!!msg,
msg,
&error_abort);
assert(job->user_paused && job->pause_count > 0);
job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
}
void block_job_event_ready(BlockJob *job)
@@ -790,7 +906,6 @@ static void block_job_defer_to_main_loop_bh(void *opaque)
aio_context_acquire(aio_context);
}
data->job->deferred_to_main_loop = false;
data->fn(data->job, data->opaque);
if (aio_context != data->aio_context) {
@@ -816,36 +931,3 @@ void block_job_defer_to_main_loop(BlockJob *job,
aio_bh_schedule_oneshot(qemu_get_aio_context(),
block_job_defer_to_main_loop_bh, data);
}
BlockJobTxn *block_job_txn_new(void)
{
BlockJobTxn *txn = g_new0(BlockJobTxn, 1);
QLIST_INIT(&txn->jobs);
txn->refcnt = 1;
return txn;
}
static void block_job_txn_ref(BlockJobTxn *txn)
{
txn->refcnt++;
}
void block_job_txn_unref(BlockJobTxn *txn)
{
if (txn && --txn->refcnt == 0) {
g_free(txn);
}
}
void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job)
{
if (!txn) {
return;
}
assert(!job->txn);
job->txn = txn;
QLIST_INSERT_HEAD(&txn->jobs, job, txn_list);
block_job_txn_ref(txn);
}

View File

@@ -27,7 +27,7 @@
#include "sysemu/sysemu.h"
#include "qapi/visitor.h"
#include "qemu/error-report.h"
#include "hw/hw.h"
#include "sysemu/reset.h"
#include "hw/qdev-core.h"
typedef struct FWBootEntry FWBootEntry;

View File

@@ -25,7 +25,6 @@
#include "qemu/config-file.h"
#include "qemu/path.h"
#include "qemu/help_option.h"
/* For tb_lock */
#include "cpu.h"
#include "exec/exec-all.h"
#include "tcg.h"

View File

@@ -85,6 +85,8 @@ struct emulated_sigtable {
/* NOTE: we force a big alignment so that the stack stored after is
aligned too */
typedef struct TaskState {
pid_t ts_tid; /* tid (or pid) of this task */
struct TaskState *next;
int used; /* non zero if used */
struct image_info *info;

View File

@@ -1,6 +1,7 @@
chardev-obj-y += char.o
chardev-obj-$(CONFIG_WIN32) += char-console.o
chardev-obj-$(CONFIG_POSIX) += char-fd.o
chardev-obj-y += char-fe.o
chardev-obj-y += char-file.o
chardev-obj-y += char-io.o
chardev-obj-y += char-mux.o
@@ -15,3 +16,9 @@ chardev-obj-y += char-stdio.o
chardev-obj-y += char-udp.o
chardev-obj-$(CONFIG_WIN32) += char-win.o
chardev-obj-$(CONFIG_WIN32) += char-win-stdio.o
common-obj-y += msmouse.o wctablet.o testdev.o
common-obj-$(CONFIG_BRLAPI) += baum.o
baum.o-cflags := $(SDL_CFLAGS)
common-obj-$(CONFIG_SPICE) += spice.o

View File

@@ -24,7 +24,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "chardev/char.h"
#include "qemu/timer.h"
#include "hw/usb.h"
#include "ui/console.h"

View File

@@ -22,14 +22,14 @@
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "char-win.h"
#include "chardev/char-win.h"
static void qemu_chr_open_win_con(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
qemu_chr_open_win_file(chr, GetStdHandle(STD_OUTPUT_HANDLE));
win_chr_set_file(chr, GetStdHandle(STD_OUTPUT_HANDLE), true);
}
static void char_console_class_init(ObjectClass *oc, void *data)

View File

@@ -25,11 +25,11 @@
#include "qemu/sockets.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "chardev/char.h"
#include "io/channel-file.h"
#include "char-fd.h"
#include "char-io.h"
#include "chardev/char-fd.h"
#include "chardev/char-io.h"
/* Called with chr_write_lock held. */
static int fd_chr_write(Chardev *chr, const uint8_t *buf, int len)

375
chardev/char-fe.c Normal file
View File

@@ -0,0 +1,375 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "qapi-visit.h"
#include "sysemu/replay.h"
#include "chardev/char-fe.h"
#include "chardev/char-io.h"
#include "chardev/char-mux.h"
int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len)
{
Chardev *s = be->chr;
if (!s) {
return 0;
}
return qemu_chr_write(s, buf, len, false);
}
int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len)
{
Chardev *s = be->chr;
if (!s) {
return 0;
}
return qemu_chr_write(s, buf, len, true);
}
int qemu_chr_fe_read_all(CharBackend *be, uint8_t *buf, int len)
{
Chardev *s = be->chr;
int offset = 0, counter = 10;
int res;
if (!s || !CHARDEV_GET_CLASS(s)->chr_sync_read) {
return 0;
}
if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_PLAY) {
return replay_char_read_all_load(buf);
}
while (offset < len) {
retry:
res = CHARDEV_GET_CLASS(s)->chr_sync_read(s, buf + offset,
len - offset);
if (res == -1 && errno == EAGAIN) {
g_usleep(100);
goto retry;
}
if (res == 0) {
break;
}
if (res < 0) {
if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_RECORD) {
replay_char_read_all_save_error(res);
}
return res;
}
offset += res;
if (!counter--) {
break;
}
}
if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_RECORD) {
replay_char_read_all_save_buf(buf, offset);
}
return offset;
}
int qemu_chr_fe_ioctl(CharBackend *be, int cmd, void *arg)
{
Chardev *s = be->chr;
int res;
if (!s || !CHARDEV_GET_CLASS(s)->chr_ioctl || qemu_chr_replay(s)) {
res = -ENOTSUP;
} else {
res = CHARDEV_GET_CLASS(s)->chr_ioctl(s, cmd, arg);
}
return res;
}
int qemu_chr_fe_get_msgfd(CharBackend *be)
{
Chardev *s = be->chr;
int fd;
int res = (qemu_chr_fe_get_msgfds(be, &fd, 1) == 1) ? fd : -1;
if (s && qemu_chr_replay(s)) {
error_report("Replay: get msgfd is not supported "
"for serial devices yet");
exit(1);
}
return res;
}
int qemu_chr_fe_get_msgfds(CharBackend *be, int *fds, int len)
{
Chardev *s = be->chr;
if (!s) {
return -1;
}
return CHARDEV_GET_CLASS(s)->get_msgfds ?
CHARDEV_GET_CLASS(s)->get_msgfds(s, fds, len) : -1;
}
int qemu_chr_fe_set_msgfds(CharBackend *be, int *fds, int num)
{
Chardev *s = be->chr;
if (!s) {
return -1;
}
return CHARDEV_GET_CLASS(s)->set_msgfds ?
CHARDEV_GET_CLASS(s)->set_msgfds(s, fds, num) : -1;
}
void qemu_chr_fe_accept_input(CharBackend *be)
{
Chardev *s = be->chr;
if (!s) {
return;
}
if (CHARDEV_GET_CLASS(s)->chr_accept_input) {
CHARDEV_GET_CLASS(s)->chr_accept_input(s);
}
qemu_notify_event();
}
void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...)
{
char buf[CHR_READ_BUF_LEN];
va_list ap;
va_start(ap, fmt);
vsnprintf(buf, sizeof(buf), fmt, ap);
/* XXX this blocks entire thread. Rewrite to use
* qemu_chr_fe_write and background I/O callbacks */
qemu_chr_fe_write_all(be, (uint8_t *)buf, strlen(buf));
va_end(ap);
}
Chardev *qemu_chr_fe_get_driver(CharBackend *be)
{
/* this is unsafe for the users that support chardev hotswap */
assert(be->chr_be_change == NULL);
return be->chr;
}
bool qemu_chr_fe_backend_connected(CharBackend *be)
{
return !!be->chr;
}
bool qemu_chr_fe_backend_open(CharBackend *be)
{
return be->chr && be->chr->be_open;
}
bool qemu_chr_fe_init(CharBackend *b, Chardev *s, Error **errp)
{
int tag = 0;
if (CHARDEV_IS_MUX(s)) {
MuxChardev *d = MUX_CHARDEV(s);
if (d->mux_cnt >= MAX_MUX) {
goto unavailable;
}
d->backends[d->mux_cnt] = b;
tag = d->mux_cnt++;
} else if (s->be) {
goto unavailable;
} else {
s->be = b;
}
b->fe_open = false;
b->tag = tag;
b->chr = s;
return true;
unavailable:
error_setg(errp, QERR_DEVICE_IN_USE, s->label);
return false;
}
void qemu_chr_fe_deinit(CharBackend *b, bool del)
{
assert(b);
if (b->chr) {
qemu_chr_fe_set_handlers(b, NULL, NULL, NULL, NULL, NULL, NULL, true);
if (b->chr->be == b) {
b->chr->be = NULL;
}
if (CHARDEV_IS_MUX(b->chr)) {
MuxChardev *d = MUX_CHARDEV(b->chr);
d->backends[b->tag] = NULL;
}
if (del) {
object_unparent(OBJECT(b->chr));
}
b->chr = NULL;
}
}
void qemu_chr_fe_set_handlers(CharBackend *b,
IOCanReadHandler *fd_can_read,
IOReadHandler *fd_read,
IOEventHandler *fd_event,
BackendChangeHandler *be_change,
void *opaque,
GMainContext *context,
bool set_open)
{
Chardev *s;
ChardevClass *cc;
int fe_open;
s = b->chr;
if (!s) {
return;
}
cc = CHARDEV_GET_CLASS(s);
if (!opaque && !fd_can_read && !fd_read && !fd_event) {
fe_open = 0;
remove_fd_in_watch(s);
} else {
fe_open = 1;
}
b->chr_can_read = fd_can_read;
b->chr_read = fd_read;
b->chr_event = fd_event;
b->chr_be_change = be_change;
b->opaque = opaque;
if (cc->chr_update_read_handler) {
cc->chr_update_read_handler(s, context);
}
if (set_open) {
qemu_chr_fe_set_open(b, fe_open);
}
if (fe_open) {
qemu_chr_fe_take_focus(b);
/* We're connecting to an already opened device, so let's make sure we
also get the open event */
if (s->be_open) {
qemu_chr_be_event(s, CHR_EVENT_OPENED);
}
}
if (CHARDEV_IS_MUX(s)) {
mux_chr_set_handlers(s, context);
}
}
void qemu_chr_fe_take_focus(CharBackend *b)
{
if (!b->chr) {
return;
}
if (CHARDEV_IS_MUX(b->chr)) {
mux_set_focus(b->chr, b->tag);
}
}
int qemu_chr_fe_wait_connected(CharBackend *be, Error **errp)
{
if (!be->chr) {
error_setg(errp, "missing associated backend");
return -1;
}
return qemu_chr_wait_connected(be->chr, errp);
}
void qemu_chr_fe_set_echo(CharBackend *be, bool echo)
{
Chardev *chr = be->chr;
if (chr && CHARDEV_GET_CLASS(chr)->chr_set_echo) {
CHARDEV_GET_CLASS(chr)->chr_set_echo(chr, echo);
}
}
void qemu_chr_fe_set_open(CharBackend *be, int fe_open)
{
Chardev *chr = be->chr;
if (!chr) {
return;
}
if (be->fe_open == fe_open) {
return;
}
be->fe_open = fe_open;
if (CHARDEV_GET_CLASS(chr)->chr_set_fe_open) {
CHARDEV_GET_CLASS(chr)->chr_set_fe_open(chr, fe_open);
}
}
guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond,
GIOFunc func, void *user_data)
{
Chardev *s = be->chr;
GSource *src;
guint tag;
if (!s || CHARDEV_GET_CLASS(s)->chr_add_watch == NULL) {
return 0;
}
src = CHARDEV_GET_CLASS(s)->chr_add_watch(s, cond);
if (!src) {
return 0;
}
g_source_set_callback(src, (GSourceFunc)func, user_data, NULL);
tag = g_source_attach(src, NULL);
g_source_unref(src);
return tag;
}
void qemu_chr_fe_disconnect(CharBackend *be)
{
Chardev *chr = be->chr;
if (chr && CHARDEV_GET_CLASS(chr)->chr_disconnect) {
CHARDEV_GET_CLASS(chr)->chr_disconnect(chr);
}
}

View File

@@ -24,12 +24,12 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "chardev/char.h"
#ifdef _WIN32
#include "char-win.h"
#include "chardev/char-win.h"
#else
#include "char-fd.h"
#include "chardev/char-fd.h"
#endif
static void qmp_chardev_open_file(Chardev *chr,
@@ -65,7 +65,7 @@ static void qmp_chardev_open_file(Chardev *chr,
return;
}
qemu_chr_open_win_file(chr, out);
win_chr_set_file(chr, out, false);
#else
int flags, in = -1, out;

View File

@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "char-io.h"
#include "chardev/char-io.h"
typedef struct IOWatchPoll {
GSource parent;

View File

@@ -24,9 +24,9 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "chardev/char.h"
#include "sysemu/block-backend.h"
#include "char-mux.h"
#include "chardev/char-mux.h"
/* MUX driver for serial I/O splitting */
@@ -266,7 +266,7 @@ static void char_mux_finalize(Object *obj)
be->chr = NULL;
}
}
qemu_chr_fe_deinit(&d->chr);
qemu_chr_fe_deinit(&d->chr, false);
}
void mux_chr_set_handlers(Chardev *chr, GMainContext *context)
@@ -278,6 +278,7 @@ void mux_chr_set_handlers(Chardev *chr, GMainContext *context)
mux_chr_can_read,
mux_chr_read,
mux_chr_event,
NULL,
chr,
context, true);
}

View File

@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "sysemu/char.h"
#include "chardev/char.h"
static void null_chr_open(Chardev *chr,
ChardevBackend *backend,

View File

@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "sysemu/char.h"
#include "chardev/char.h"
#include "qapi/error.h"
#include <sys/ioctl.h>
@@ -41,8 +41,8 @@
#endif
#endif
#include "char-fd.h"
#include "char-parallel.h"
#include "chardev/char-fd.h"
#include "chardev/char-parallel.h"
#if defined(__linux__)

View File

@@ -23,12 +23,12 @@
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "sysemu/char.h"
#include "chardev/char.h"
#ifdef _WIN32
#include "char-win.h"
#include "chardev/char-win.h"
#else
#include "char-fd.h"
#include "chardev/char-fd.h"
#endif
#ifdef _WIN32
@@ -58,27 +58,27 @@ static int win_chr_pipe_init(Chardev *chr, const char *filename,
}
openname = g_strdup_printf("\\\\.\\pipe\\%s", filename);
s->hcom = CreateNamedPipe(openname,
s->file = CreateNamedPipe(openname,
PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED,
PIPE_TYPE_BYTE | PIPE_READMODE_BYTE |
PIPE_WAIT,
MAXCONNECT, NSENDBUF, NRECVBUF, NTIMEOUT, NULL);
g_free(openname);
if (s->hcom == INVALID_HANDLE_VALUE) {
if (s->file == INVALID_HANDLE_VALUE) {
error_setg(errp, "Failed CreateNamedPipe (%lu)", GetLastError());
s->hcom = NULL;
s->file = NULL;
goto fail;
}
ZeroMemory(&ov, sizeof(ov));
ov.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
ret = ConnectNamedPipe(s->hcom, &ov);
ret = ConnectNamedPipe(s->file, &ov);
if (ret) {
error_setg(errp, "Failed ConnectNamedPipe");
goto fail;
}
ret = GetOverlappedResult(s->hcom, &ov, &size, TRUE);
ret = GetOverlappedResult(s->file, &ov, &size, TRUE);
if (!ret) {
error_setg(errp, "Failed GetOverlappedResult");
if (ov.hEvent) {

View File

@@ -24,12 +24,12 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "chardev/char.h"
#include "io/channel-file.h"
#include "qemu/sockets.h"
#include "qemu/error-report.h"
#include "char-io.h"
#include "chardev/char-io.h"
#if defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \
|| defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \

Some files were not shown because too many files have changed in this diff Show More