Compare commits

...

422 Commits

Author SHA1 Message Date
Prasad J Pandit
dff0367cf6 usb: ehci: add capability mmio write function
USB Ehci emulation supports host controller capability registers.
But its mmio '.write' function was missing, which lead to a null
pointer dereference issue. Add a do nothing 'ehci_caps_write'
definition to avoid it; Do nothing because capability registers
are Read Only(RO).

Reported-by: Zuozhi Fzz <zuozhi.fzz@alibaba-inc.com>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-id: 1454072434-16045-1-git-send-email-ppandit@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-18 14:20:39 +01:00
Matthew Fortune
983bff3530 hw/usb/dev-mtp: Guard inotify usage with CONFIG_INOTIFY1
inotify_init1 usage was guarded by a check for linux but does not
exist on older distributions like CentOS 5 resulting in build
failures.

Signed-off-by: Matthew Fortune <matthew.fortune@imgtec.com>
Message-id: 6D39441BF12EF246A7ABCE6654B023536BB85D4A@hhmail02.hh.imgtec.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-18 13:58:15 +01:00
Peter Xu
f34d57d359 usb: fix unbound stack warning for inotify_watchfn
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1457503640-31473-1-git-send-email-peterx@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-18 13:56:24 +01:00
Peter Xu
e3d60bc7c6 usb: fix unbound stack usage for usb_mtp_add_str
Use heap instead of stack.

Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-18 13:55:16 +01:00
Peter Xu
182b391e79 usb: fix unbounded stack warning for xhci_dma_write_u32s
All the callers for xhci_dma_write_u32s() are using mostly 5 * uint32_t
in len. To avoid unbound stack warning for the function, make it
statically allocated, and assert when it's not big enough in the
future.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-id: 1457661106-9569-1-git-send-email-peterx@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-18 13:42:14 +01:00
Stefan Weil
0ab6d12ffd usb: Fix compilation for Windows
Mingw-w64 does not provide sys/ioctl.h and Linux builds don't need it,
so remove that include statement.

ERROR is defined by wingdi.h (included via windows.h). Undefine it before
it is redefined to avoid a compiler warning / error.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
Message-id: 1458159439-32322-1-git-send-email-sw@weilnetz.de
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-18 13:13:30 +01:00
Peter Maydell
6741d38ad0 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Thu 17 Mar 2016 15:49:29 GMT using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream: (29 commits)
  iotests: Test QUORUM_REPORT_BAD in fifo mode
  quorum: Emit QUORUM_REPORT_BAD for reads in fifo mode
  block: Use blk_co_pwritev() in blk_co_write_zeroes()
  block: Use blk_aio_prwv() for aio_read/write/write_zeroes
  block: Use blk_prw() in blk_pread()/blk_pwrite()
  block: Use blk_co_pwritev() in blk_write_zeroes()
  block: Pull up blk_read_unthrottled() implementation
  block: Use blk_co_pwritev() for blk_write()
  block: Use blk_co_preadv() for blk_read()
  block: Use BdrvChild in BlockBackend
  block: Remove bdrv_states list
  block: Use bdrv_next() instead of bdrv_states
  block: Rewrite bdrv_next()
  block: Add blk_next_root_bs()
  block: Add bdrv_next_monitor_owned()
  block: Move some bdrv_*_all() functions to BB
  blockdev: Remove blk_hide_on_behalf_of_hmp_drive_del()
  blockdev: Split monitor reference from BB creation
  blockdev: Separate BB name management
  blockdev: Add list of all BlockBackends
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-17 15:59:42 +00:00
Kevin Wolf
361dca7a5a Merge remote-tracking branch 'mreitz/tags/pull-block-for-kevin-2016-03-17-v2' into queue-block
Two quorum patches for the block queue, v2.

# gpg: Signature made Thu Mar 17 16:44:11 2016 CET using RSA key ID E838ACAD
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"

* mreitz/tags/pull-block-for-kevin-2016-03-17-v2:
  iotests: Test QUORUM_REPORT_BAD in fifo mode
  quorum: Emit QUORUM_REPORT_BAD for reads in fifo mode

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 16:48:49 +01:00
Alberto Garcia
509565f36f iotests: Test QUORUM_REPORT_BAD in fifo mode
Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: c0a8dbfdbe939520cda5f661af6f1cd7b6b4df9d.1458034554.git.berto@igalia.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-03-17 16:43:30 +01:00
Alberto Garcia
6049490df4 quorum: Emit QUORUM_REPORT_BAD for reads in fifo mode
If there's an I/O error in one of Quorum children then QEMU
should emit QUORUM_REPORT_BAD. However this is not working with
read-pattern=fifo. This patch fixes this problem.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: d57e39e8d3e8564003a1e2aadbd29c97286eb2d2.1458034554.git.berto@igalia.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-03-17 16:43:30 +01:00
Kevin Wolf
8896e08814 block: Use blk_co_pwritev() in blk_co_write_zeroes()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 16:30:00 +01:00
Kevin Wolf
57d6a42883 block: Use blk_aio_prwv() for aio_read/write/write_zeroes
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 16:30:00 +01:00
Kevin Wolf
a55d3fba99 block: Use blk_prw() in blk_pread()/blk_pwrite()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:57 +01:00
Kevin Wolf
fc1453cdfc block: Use blk_co_pwritev() in blk_write_zeroes()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:57 +01:00
Kevin Wolf
5bd5119667 block: Pull up blk_read_unthrottled() implementation
Use blk_read(), so that it goes through blk_co_preadv() like all read
requests from the BB to the BDS.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:57 +01:00
Kevin Wolf
a8823a3bfd block: Use blk_co_pwritev() for blk_write()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:57 +01:00
Kevin Wolf
1bf1cbc91f block: Use blk_co_preadv() for blk_read()
This patch introduces blk_co_preadv() as a central function on the
BlockBackend level that is supposed to handle all read requests from the
BB to its root BDS eventually.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:57 +01:00
Kevin Wolf
f21d96d04b block: Use BdrvChild in BlockBackend
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:57 +01:00
Max Reitz
9aaf28c61d block: Remove bdrv_states list
Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:57 +01:00
Max Reitz
79720af640 block: Use bdrv_next() instead of bdrv_states
There is no point in manually iterating through the bdrv_states list
when there is bdrv_next().

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:57 +01:00
Max Reitz
2626058034 block: Rewrite bdrv_next()
Instead of using the bdrv_states list, iterate over all the
BlockDriverStates attached to BlockBackends, and over all the
monitor-owned BDSs afterwards (except for those attached to a BB).

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
981f4f578e block: Add blk_next_root_bs()
This function iterates over all BDSs attached to a BB. We are going to
need it when rewriting bdrv_next() so it no longer uses bdrv_states.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
262b4e8f74 block: Add bdrv_next_monitor_owned()
Add a function for iterating over all monitor-owned BlockDriverStates so
the generic block layer can do so.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
fe1a9cbc33 block: Move some bdrv_*_all() functions to BB
Move bdrv_commit_all() and bdrv_flush_all() to the BlockBackend level.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
7c735873d9 blockdev: Remove blk_hide_on_behalf_of_hmp_drive_del()
We can basically inline it in hmp_drive_del(); monitor_remove_blk() is
called already, so we just need to call bdrv_make_anon(), too.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
efaa7c4eeb blockdev: Split monitor reference from BB creation
Before this patch, blk_new() automatically assigned a name to the new
BlockBackend and considered it referenced by the monitor. This patch
removes the implicit monitor_add_blk() call from blk_new() (and
consequently the monitor_remove_blk() call from blk_delete(), too) and
thus blk_new() (and related functions) no longer take a BB name
argument.

In fact, there is only a single point where blk_new()/blk_new_open() is
called and the new BB is monitor-owned, and that is in blockdev_init().
Besides thus relieving us from having to invent names for all of the BBs
we use in qemu-img, this fixes a bug where qemu cannot create a new
image if there already is a monitor-owned BB named "image".

If a BB and its BDS tree are created in a single operation, as of this
patch the BDS tree will be created before the BB is given a name
(whereas it was the other way around before). This results in minor
change to the output of iotest 087, whose reference output is amended
accordingly.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
e5e785500b blockdev: Separate BB name management
Introduce separate functions (monitor_add_blk() and
monitor_remove_blk()) which set or unset a BB name. Since the name is
equivalent to the monitor's reference to a BB, adding a name the same as
declaring the BB to be monitor-owned and removing it revokes this
status, hence the function names.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
2cf22d6a1a blockdev: Add list of all BlockBackends
While monitor_block_backends contains nearly all BBs, we sometimes
really need all BBs. To this end, this patch adds the block_backend
list.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
9492b0b928 blockdev: Rename blk_backends
The blk_backends list does not contain all BlockBackends but only the
ones which are referenced by the monitor, and that is not necessarily
true for every BlockBackend. Rename the list to monitor_block_backends
to make that fact clear.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
d0e46a5577 block: Drop BB name from bad option error
The information which BB is concerned does not seem useful enough to
justify its existence in most other place (which may be related to qemu
printing the -drive parameter in question anyway, and for blockdev-add
the attribution is naturally unambiguous). Furthermore, as of a future
patch, bdrv_get_device_name(bs) will always return the empty string
before bdrv_open_inherit() returns.

Therefore, just dropping that information seems to be the best course of
action.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
a55448b368 qapi: Drop QERR_UNKNOWN_BLOCK_FORMAT_FEATURE
Just specifying a custom string is simpler in basically all places that
used it, and in addition, specifying the BB or node name is something we
generally do not do in other error messages when opening a BDS, so we
should not do it here.

This changes the output for iotest 036 (to the better, in my opinion),
so the reference output needs to be changed accordingly.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
da31d594cf block: Use blk_{commit,flush}_all() consistently
Replace bdrv_commmit_all() and bdrv_flush_all() by their BlockBackend
equivalents.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
1393f21270 block: Add blk_commit_all()
Later, we will remove bdrv_commit_all() and move its contents here, and
in order to replace bdrv_commit_all() calls by calls to blk_commit_all()
before doing so, we need to add it as an alias now.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
74d1b8fc27 block: Use blk_next() in block-backend.c
Instead of iterating directly through blk_backends, we can use
blk_next() instead. This gives us some abstraction from the list itself
which we can use to rename it, for example.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Max Reitz
da27a00e27 monitor: Use BB list for BB name completion
Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-17 15:47:56 +01:00
Kevin Wolf
f8746fb804 block: Fix memory leak in hmp_drive_add_node()
hmp_drive_add_node() leaked qdict in the error path when no node-name is
specified.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2016-03-17 15:47:56 +01:00
Kevin Wolf
23f7fcb295 block: Fix qemu_root_bds_opts.head initialisation
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-03-17 15:47:56 +01:00
Peter Maydell
331ac65963 Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Thu 17 Mar 2016 11:08:28 GMT using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"

* remotes/stefanha/tags/block-pull-request:
  Revert "qed: Implement .bdrv_drain"
  aio-posix: Change CONFIG_EPOLL to CONFIG_EPOLL_CREATE1

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-17 11:27:54 +00:00
Stefan Hajnoczi
1f3ddfcb25 Revert "qed: Implement .bdrv_drain"
This reverts commit df9a681dc9.

Note that commit df9a681dc9 included some
unrelated hunks, possibly due to a merge failure or an overlooked
squash.  This only reverts the qed .bdrv_drain() implementation.

The qed .bdrv_drain() implementation is unsafe and can lead to a double
request completion.

Paolo Bonzini reports:
"The problem is that bdrv_qed_drain calls qed_plug_allocating_write_reqs
unconditionally, but this is not correct if an allocating write is
queued.  In this case, qed_unplug_allocating_write_reqs will restart the
allocating write and possibly cause it to complete.  The aiocb however
is still in use for the L2/L1 table writes, and will then be completed
again as soon as the table writes are stable."

For QEMU 2.6 we can simply revert this commit.  A full solution for the
qed need check timer may be added if the bdrv_drain() implementation is
extended.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1457431876-8475-1-git-send-email-stefanha@redhat.com
2016-03-17 09:50:14 +00:00
Matthew Fortune
147dfab747 aio-posix: Change CONFIG_EPOLL to CONFIG_EPOLL_CREATE1
CONFIG_EPOLL was being used to guard epoll_create1 which results
in build failures on CentOS 5.

Signed-off-by: Matthew Fortune <matthew.fortune@imgtec.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 6D39441BF12EF246A7ABCE6654B023536BB85D08@hhmail02.hh.imgtec.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-17 09:50:14 +00:00
Peter Maydell
8c45754724 Merge remote-tracking branch 'remotes/ehabkost/tags/machine-pull-request' into staging
Machine Core queue, 2016-03-16

# gpg: Signature made Wed 16 Mar 2016 18:57:34 GMT using RSA key ID 984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"

* remotes/ehabkost/tags/machine-pull-request:
  module: Rename machine_init() to opts_init()
  machine: Use type_init() to register machine classes

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-17 08:52:58 +00:00
Eduardo Habkost
34294e2f54 module: Rename machine_init() to opts_init()
The only remaining users of machine_init() only call
qemu_add_opts(). Rename machine_init() to opts_init() and move it
closer to the qemu_add_opts() calls on vl.c.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-03-16 15:54:23 -03:00
Eduardo Habkost
0e6aac87fd machine: Use type_init() to register machine classes
Change all machine_init() users that simply call type_register*()
to use type_init().

Cc: Evgeny Voevodin <e.voevodin@samsung.com>
Cc: Maksim Kozlov <m.kozlov@samsung.com>
Cc: Igor Mitsyanko <i.mitsyanko@gmail.com>
Cc: Dmitry Solodkiy <d.solodkiy@samsung.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Andrzej Zaborowski <balrogg@gmail.com>
Cc: Michael Walle <michael@walle.cc>
Cc: "Hervé Poussineau" <hpoussin@reactos.org>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: Leon Alrae <leon.alrae@imgtec.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Blue Swirl <blauwirbel@gmail.com>
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-03-16 15:34:05 -03:00
Peter Maydell
33616ace9f Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Wed 16 Mar 2016 17:33:44 GMT using RSA key ID C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"

* remotes/cody/tags/block-pull-request:
  MAINTAINERS: Fix typo, block/stream.h -> block/stream.c
  block/sheepdog: fix argument passed to qemu_strtoul()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 18:20:10 +00:00
Peter Maydell
d1f8764099 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160316-1' into staging
target-arm queue:
 * loader: Fix incorrect parameter name in load_image_mr()
 * Implement MRS (banked) and MSR (banked) instructions
 * virt: Implement versioning for machine model
 * i.MX: some initial patches preparing for i.MX6 support
 * new ASPEED AST2400 SoC and palmetto-bmc machine
 * bcm2835: add some more raspi2 devices
 * sd: fix segfault running "info qtree"

# gpg: Signature made Wed 16 Mar 2016 17:42:43 GMT using RSA key ID 14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"

* remotes/pmaydell/tags/pull-target-arm-20160316-1: (21 commits)
  sd: Fix "info qtree" on boards with SD cards
  bcm2835_dma: add emulation of Raspberry Pi DMA controller
  bcm2835_property: implement framebuffer control/configuration properties
  bcm2835_fb: add framebuffer device for Raspberry Pi
  bcm2835_aux: add emulation of BCM2835 AUX (aka UART1) block
  bcm2835_peripherals: enable sdhci pending-insert quirk for raspberry pi
  hw/arm: Add palmetto-bmc machine
  hw/arm: Add ASPEED AST2400 SoC model
  hw/intc: Add (new) ASPEED VIC device model
  hw/timer: Add ASPEED timer device model
  i.MX: Add missing descriptions in devices.
  i.MX: Add i.MX6 CCM and ANALOG device.
  i.MX: Add the CLK_IPG_HIGH clock
  i.MX: Remove CCM useless clock computation handling.
  i.MX: Rename CCM NOCLK to CLK_NONE for naming consistency.
  i.MX: Allow GPT timer to rollover.
  arm: virt: Move machine class init code to the abstract machine type
  arm: virt: Add an abstract ARM virt machine type
  target-arm: Fix translation level on early translation faults
  target-arm: Implement MRS (banked) and MSR (banked) instructions
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:43:37 +00:00
Peter Maydell
fec44a8c70 sd: Fix "info qtree" on boards with SD cards
The SD card object is not a SysBusDevice, so don't create it with
qdev_create() if we're not assigning it to a specific bus; use
object_new() instead.

This was causing 'info qtree' to segfault on boards with SD cards,
because qdev_create(NULL, TYPE_FOO) puts the created object on the
system bus, and then we may try to run functions like sysbus_dev_print()
on it, which fail when casting the object to SysBusDevice.

(This is the same mistake that we made with the NAND device
and fixed in commit 6749695eaaf346c1.)

Reported-by: xiaoqiang.zhao <zxq_yx_007@163.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: xiaoqiang.zhao <zxq_yx_007@163.com>
Message-id: 1458061009-7733-1-git-send-email-peter.maydell@linaro.org
2016-03-16 17:42:19 +00:00
Grégory ESTRADE
6717f587a4 bcm2835_dma: add emulation of Raspberry Pi DMA controller
At present, all DMA transfers complete inline (so a looping descriptor
queue will lock up the device). We also do not model pause/abort,
arbitrarion/priority, or debug features.

Signed-off-by: Grégory ESTRADE <gregory.estrade@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
Message-id: 1457467526-8840-6-git-send-email-Andrew.Baumann@microsoft.com
[AB: implement 2D mode, cleanup/refactoring for upstream submission]
Signed-off-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Grégory ESTRADE
355a8ccc5c bcm2835_property: implement framebuffer control/configuration properties
The property channel driver now interfaces with the framebuffer device
to query and set framebuffer parameters. As a result of this, the "get
ARM RAM size" query now correctly returns the video RAM base address
(not total RAM size), and the ram-size property is no longer relevant
here.

Signed-off-by: Grégory ESTRADE <gregory.estrade@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
Message-id: 1457467526-8840-5-git-send-email-Andrew.Baumann@microsoft.com
[AB: cleanup/refactoring for upstream submission]
Signed-off-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Grégory ESTRADE
5e9c2a8dac bcm2835_fb: add framebuffer device for Raspberry Pi
The framebuffer occupies the upper portion of memory (64MiB by
default), but it can only be controlled/configured via a system
mailbox or property channel (to be added by a subsequent patch).

Signed-off-by: Grégory ESTRADE <gregory.estrade@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
Message-id: 1457467526-8840-4-git-send-email-Andrew.Baumann@microsoft.com
[AB: added Windows (BGR) support and cleanup/refactoring for upstream submission]
Signed-off-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Andrew Baumann
97398d900c bcm2835_aux: add emulation of BCM2835 AUX (aka UART1) block
At present only the core UART functions (data path for tx/rx) are
implemented, which is enough for UEFI to boot. The following
features/registers are unimplemented:
  * Line/modem control
  * Scratch register
  * Extra control
  * Baudrate
  * SPI interfaces

Signed-off-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1457467526-8840-3-git-send-email-Andrew.Baumann@microsoft.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Andrew Baumann
a2a8dfa8d8 bcm2835_peripherals: enable sdhci pending-insert quirk for raspberry pi
Signed-off-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1457467526-8840-2-git-send-email-Andrew.Baumann@microsoft.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Andrew Jeffery
327d8e4ed2 hw/arm: Add palmetto-bmc machine
The new machine is a thin layer over the AST2400 ARM926-based SoC[1].
Between the minimal machine and the current SoC implementation there is
enough functionality to boot an aspeed_defconfig Linux kernel to
userspace. Nothing yet is specific to the Palmetto's BMC (other than
using an AST2400 SoC), but creating specific machine types is preferable
to a generic machine that doesn't match any particular hardware.

[1] http://www.aspeedtech.com/products.php?fPath=20&rId=376

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Message-id: 1458096317-25223-5-git-send-email-andrew@aj.id.au
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Andrew Jeffery
43e3346e43 hw/arm: Add ASPEED AST2400 SoC model
While the ASPEED AST2400 SoC[1] has a broad range of capabilities this
implementation is minimal, comprising an ARM926 processor, ASPEED VIC
and timer devices, and a 8250 UART.

[1] http://www.aspeedtech.com/products.php?fPath=20&rId=376

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Message-id: 1458096317-25223-4-git-send-email-andrew@aj.id.au
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Andrew Jeffery
0c69996e22 hw/intc: Add (new) ASPEED VIC device model
Implement a basic ASPEED VIC device model for the AST2400 SoC[1], with
enough functionality to boot an aspeed_defconfig Linux kernel. The model
implements the 'new' (revised) register set: While the hardware exposes
both the new and legacy register sets, accesses to the model's legacy
register set will not be serviced (however the access will be logged).

[1] http://www.aspeedtech.com/products.php?fPath=20&rId=376

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Message-id: 1458096317-25223-3-git-send-email-andrew@aj.id.au
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Andrew Jeffery
c04bd47db6 hw/timer: Add ASPEED timer device model
Implement basic ASPEED timer functionality for the AST2400 SoC[1]: Up to
8 timers can independently be configured, enabled, reset and disabled.
Some hardware features are not implemented, namely clock value matching
and pulse generation, but the implementation is enough to boot the Linux
kernel configured with aspeed_defconfig.

[1] http://www.aspeedtech.com/products.php?fPath=20&rId=376

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Message-id: 1458096317-25223-2-git-send-email-andrew@aj.id.au
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Jean-Christophe Dubois
eccfa35e9f i.MX: Add missing descriptions in devices.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
Message-id: f1f565eb9dffdeb582feb1b15ba9e8b0afcf5468.1456868959.git.jcd@tribudubois.net
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Jean-Christophe Dubois
a66d815cd5 i.MX: Add i.MX6 CCM and ANALOG device.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
Message-id: 9fa80b4d8c5d0f50c94e77d74f952a7a665e168f.1456868959.git.jcd@tribudubois.net
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Jean-Christophe Dubois
d552f675fb i.MX: Add the CLK_IPG_HIGH clock
EPIT, GPT and other i.MX timers are using "abstract" clocks among which
a CLK_IPG_HIGH clock.

On i.MX25 and i.MX31 CLK_IPG and CLK_IPG_HIGH are mapped to the same clock
but on other SOC like i.MX6 they are mapped to distinct clocks.

This patch add the CLK_IPG_HIGH to prepare for SOC where these 2 clocks are
different.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
Message-id: 224bf650194760284cb40630e985867e1373276a.1456868959.git.jcd@tribudubois.net
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Jean-Christophe Dubois
f4b2add6cc i.MX: Remove CCM useless clock computation handling.
Most clocks supported by the CCM are useless to the qemu framework.

Only clocks related to timers (EPIT, GPT, PWM, WATCHDOG, ...) are usefull
to QEMU code.

Therefore this patch removes clock computation handling for all clocks but:
* CLK_NONE,
* CLK_IPG,
* CLK_32k

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
Message-id: 9e7222efb349801032e60c0f6b0fbad0e5dcf648.1456868959.git.jcd@tribudubois.net
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Jean-Christophe Dubois
c91a5883c3 i.MX: Rename CCM NOCLK to CLK_NONE for naming consistency.
This way all CCM clock defines/enums are named CLK_XXX

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
Message-id: 8537df765c1713625c7a8b9aca4c7ca60b42e0c0.1456868959.git.jcd@tribudubois.net
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Jean-Christophe Dubois
4833e15f74 i.MX: Allow GPT timer to rollover.
GPT timer need to rollover when it reaches 0xffffffff.

It also need to reset to 0 when in "restart mode" and crossing the
compare 1 register.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
Message-id: 6e2b36117a249a78bf822dd59a390368f407136e.1456868959.git.jcd@tribudubois.net
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Wei Huang
9c94d8e6c9 arm: virt: Move machine class init code to the abstract machine type
This patch moves the common class initialization code from
"virt-2.6" to the new abstract class. An empty property is added to
"virt-2.6" machine. In the meanwhile, related funtions are renamed
to "virt_2_6_*" for consistency.

Signed-off-by: Wei Huang <wei@redhat.com>
Message-id: 1457717778-17727-3-git-send-email-wei@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Wei Huang
ed796373b4 arm: virt: Add an abstract ARM virt machine type
In preparation for future ARM virt machine types, this patch creates
an abstract type for all ARM machines. The current machine type in
QEMU (i.e. "virt") is renamed to "virt-2.6", whose naming scheme is
similar to other architectures. For the purpose of backward compatibility,
"virt" is converted to an alias, pointing to "virt-2.6". With this patch,
"qemu -M ?" lists the following virtual machine types along with others:

virt                 QEMU 2.6 ARM Virtual Machine (alias of virt-2.6)
virt-2.6             QEMU 2.6 ARM Virtual Machine

Signed-off-by: Wei Huang <wei@redhat.com>
Message-id: 1457717778-17727-2-git-send-email-wei@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Sergey Sorokin
1b4093ea66 target-arm: Fix translation level on early translation faults
Qemu reports translation fault on 1st level instead of 0th level in case of
AArch64 address translation if the translation table walk is disabled or
the address is in the gap between the two regions.

Signed-off-by: Sergey Sorokin <afarallax@yandex.ru>
Message-id: 1457527503-25958-1-git-send-email-afarallax@yandex.ru
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:42:18 +00:00
Jeff Cody
773460256b MAINTAINERS: Fix typo, block/stream.h -> block/stream.c
There is no block/stream.h, the intended filename is block/stream.c
instead.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: b9feeac95301c1b0b1c28a485da5e3781370c31a.1457578261.git.jcody@redhat.com
2016-03-16 13:25:29 -04:00
Jeff Cody
03c698f0a2 block/sheepdog: fix argument passed to qemu_strtoul()
The function qemu_strtoul() reads 'unsigned long' sized data,
which is larger than uint32_t on 64-bit machines.

Even though the snap_id field in the header is 32-bits, we must
accommodate the full size in qemu_strtoul().

This patch also adds more meaningful error handling to the
qemu_strtoul() call, and subsequent results.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
Message-id: e56fc50abedd9a112e0683342c8eafda063cd2f9.1456935548.git.jcody@redhat.com
2016-03-16 13:25:29 -04:00
Peter Maydell
8bfd0550be target-arm: Implement MRS (banked) and MSR (banked) instructions
Starting with the ARMv7 Virtualization Extensions, the A32 and T32
instruction sets provide instructions "MSR (banked)" and "MRS
(banked)" which can be used to access registers for a mode other
than the current one:
 * R<m>_<mode>
 * ELR_hyp
 * SPSR_<mode>

Implement the missing instructions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Acked-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Message-id: 1456762734-23939-1-git-send-email-peter.maydell@linaro.org
2016-03-16 17:05:58 +00:00
Jens Wiklander
f09f9bd9fa loader: Fix incorrect parameter name in load_image_mr() macro
Fix a typo in the load_image_mr() macro: 'mr' was written when
the parameter name is '_mr'. (This had no visible effects since
the single use of the macro used 'mr' as the argument.)

Fixes 76151cacfe "loader: Add
load_image_mr() to load ROM image to a MemoryRegion"

Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 17:05:58 +00:00
Peter Maydell
0ebc03bc06 util/base64.c: Clean includes
Remove unnecessary include of config-host.h.
(This was missed by the clean-includes script because of the
incorrect use of <> for a QEMU header.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-id: 1456237112-32662-5-git-send-email-peter.maydell@linaro.org
2016-03-16 12:48:11 +00:00
Peter Maydell
8bc92a762a update-linux-headers.sh: Fake types.h doesn't need to include anything
We have a fake linux/types.h which we create in update-linux-headers.h.
Now that every QEMU source file includes osdep.h, this fake header
doesn't need to include anything at all.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-id: 1456237112-32662-4-git-send-email-peter.maydell@linaro.org
2016-03-16 12:48:11 +00:00
Peter Maydell
8816c600d3 include/config.h: Remove
include/config.h just includes config-target.h (and used to also
include config-host.h).
It is now obsolete and unused, because osdep.h does this job, so
remove it.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-id: 1456237112-32662-3-git-send-email-peter.maydell@linaro.org
2016-03-16 12:48:11 +00:00
Peter Maydell
4674da1c49 slirp/slirp.h: Remove now-empty #ifdefs
After automatic cleanup to remove unnecessary #includes of headers that
osdep.h provides, slirp.h has a few now unnecessary #ifdef/#endif pairs;
remove them.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-id: 1456237112-32662-2-git-send-email-peter.maydell@linaro.org
2016-03-16 12:48:11 +00:00
Peter Maydell
6aeda86890 Merge remote-tracking branch 'remotes/armbru/tags/pull-error-2016-03-16' into staging
Error reporting patches for 2016-03-16

# gpg: Signature made Wed 16 Mar 2016 09:57:00 GMT using RSA key ID EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"

* remotes/armbru/tags/pull-error-2016-03-16:
  error: ensure errno detail is printed with error_abort

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 11:09:36 +00:00
Peter Maydell
cad0b273e5 Merge remote-tracking branch 'remotes/armbru/tags/pull-monitor-2016-03-16' into staging
Monitor patches for 2016-03-16

# gpg: Signature made Wed 16 Mar 2016 09:47:23 GMT using RSA key ID EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"

* remotes/armbru/tags/pull-monitor-2016-03-16:
  qdev-monitor: add missing aliases for virtio device classes
  qdev-monitor: sort alias table by typename
  qdev-monitor: improve error message when alias device is unavailable

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 10:38:15 +00:00
Peter Maydell
f235538e38 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.6-20160316' into staging
ppc patch queue for 2016-03-16

Accumulated patches for target-ppc, pseries machine type and related
devices.  As we are now in soft freeze, these are mostly fixes.
   * Fix KVM migration for several SPRs that qemu didn't handle
   * Clean up handling of SDR1, which allows a fix to the gdbstub
   * Fix a race in spapr_rng
   * Fix a bug with multifunction hotplug

The exception is the 7 patches to allow EEH on spapr-pci-host-bridge
devices (rather than the special and poorly designed
spapr-vfio-pci-host-bridge device).  I believe these are low risk of
breaking non-EEH cases, and EEH cases were little used in practice
previously (since libvirt did not support the special device amongst
other things).  It did have a draft posted before the soft freeze,
removes a very ugly VFIO interface, and removes device we'd like to
deprecate sooner rather than later.  So, I'm hoping we can squeeze
these in during the soft freeze.

This includes two patches to the VFIO code, which Alex Williamson has
indicated he's ok with coming through my tree.

# gpg: Signature made Wed 16 Mar 2016 05:04:52 GMT using RSA key ID 20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.6-20160316:
  vfio: Eliminate vfio_container_ioctl()
  spapr_pci: Remove finish_realize hook
  spapr_pci: (Mostly) remove spapr-pci-vfio-host-bridge
  spapr_pci: Allow EEH on spapr-pci-host-bridge
  spapr_pci: Eliminate class callbacks
  spapr_pci: Switch to vfio_eeh_as_op() interface
  vfio: Start improving VFIO/EEH interface
  spapr_rng: fix race with main loop
  target-ppc: Eliminate kvmppc_kern_htab global
  target-ppc: Add helpers for updating a CPU's SDR1 and external HPT
  target-ppc: Split out SREGS get/put functions
  spapr_pci: fix multifunction hotplug
  target-ppc: Add PVR for POWER8NVL processor
  ppc: Add a few more P8 PMU SPRs
  ppc: Fix migration of the TAR SPR
  ppc: Define the PSPB register on POWER8

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 10:09:26 +00:00
Daniel P. Berrange
20e2dec149 error: ensure errno detail is printed with error_abort
When &error_abort is passed in, the error reporting code
will print the current error message and then abort() the
process. Unfortunately at the time it aborts, we've not
yet appended the errno detail. This makes debugging certain
problems significantly harder as the log is incomplete.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1457544504-8548-22-git-send-email-berrange@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-16 10:55:51 +01:00
Peter Maydell
af1d3ebbef Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
acpi: minor fix

Since previous pull acpi test triggers warnings,
fix it up.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Tue 15 Mar 2016 21:26:38 GMT using RSA key ID D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"

* remotes/mst/tags/for_upstream:
  acpi-test: update UID for GSI links

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-16 09:27:58 +00:00
Sascha Silbe
588c36cac7 qdev-monitor: add missing aliases for virtio device classes
virtio-{blk,balloon,net,serial} are aliases for their actual,
architecture-dependent implementations (*-ccw on s390x, *-pci on other
architectures supporting virtio). This makes it a lot easier to craft
qemu invocations that work on all supported architectures. Complete
the set to cover all existing non-abstract virtio device classes.

For virtio-balloon, only the CCW implementation was missing.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1455831854-49013-4-git-send-email-silbe@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-16 10:13:10 +01:00
Sascha Silbe
36e9916811 qdev-monitor: sort alias table by typename
Sort the alias table by typename so it's easier to see which aliases
exist.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1455831854-49013-3-git-send-email-silbe@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-16 10:13:10 +01:00
Sascha Silbe
f6b5319d41 qdev-monitor: improve error message when alias device is unavailable
When trying to instantiate an alias that points to a device class that
doesn't exist, the error message looks like qemu misunderstood the
request:

$ s390x-softmmu/qemu-system-s390x -device virtio-gpu
qemu-system-s390x: -device virtio-gpu: 'virtio-gpu-ccw' is not a valid
device model name

Special-case the error message to make it explicit that alias
expansion is going on:

$ s390x-softmmu/qemu-system-s390x -device virtio-gpu
qemu-system-s390x: -device virtio-gpu: 'virtio-gpu' (alias
'virtio-gpu-ccw') is not a valid device model name

Suggested-By: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1455831854-49013-2-git-send-email-silbe@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-16 10:13:10 +01:00
David Gibson
3356128cd1 vfio: Eliminate vfio_container_ioctl()
vfio_container_ioctl() was a bad interface that bypassed abstraction
boundaries, had semantics that sat uneasily with its name, and was unsafe
in many realistic circumstances.  Now that spapr-pci-vfio-host-bridge has
been folded into spapr-pci-host-bridge, there are no more users, so remove
it.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
2016-03-16 09:55:11 +11:00
David Gibson
a36304fdca spapr_pci: Remove finish_realize hook
Now that spapr-pci-vfio-host-bridge is reduced to just a stub, there is
only one implementation of the finish_realize hook in sPAPRPHBClass.  So,
we can fold that implementation into its (single) caller, and remove the
hook.  That's the last thing left in sPAPRPHBClass, so that can go away as
well.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2016-03-16 09:55:11 +11:00
David Gibson
72700d7e73 spapr_pci: (Mostly) remove spapr-pci-vfio-host-bridge
Now that the regular spapr-pci-host-bridge can handle EEH, there are only
two things that spapr-pci-vfio-host-bridge does differently:
    1. automatically sizes its DMA window to match the host IOMMU
    2. checks if the attached VFIO container is backed by the
       VFIO_SPAPR_TCE_IOMMU type on the host

(1) is not particularly useful, since the default window used by the
regular host bridge will work with the host IOMMU configuration on all
current systems anyway.

Plus, automatically changing guest visible configuration (such as the DMA
window) based on host settings is generally a bad idea.  It's not
definitively broken, since spapr-pci-vfio-host-bridge is only supposed to
support VFIO devices which can't be migrated anyway, but still.

(2) is not really useful, because if a guest tries to configure EEH on a
different host IOMMU, the first call will fail and that will be that.

It's possible there are scripts or tools out there which expect
spapr-pci-vfio-host-bridge, so we don't remove it entirely.  This patch
reduces it to just a stub for backwards compatibility.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2016-03-16 09:55:11 +11:00
David Gibson
c1fa017c7e spapr_pci: Allow EEH on spapr-pci-host-bridge
Now that the EEH code is independent of the special
spapr-vfio-pci-host-bridge device, we can allow it on all spapr PCI
host bridges instead.  We do this by changing spapr_phb_eeh_available()
to be based on the vfio_eeh_as_ok() call instead of the host bridge class.

Because the value of vfio_eeh_as_ok() can change with devices being
hotplugged or unplugged, this can potentially lead to some strange edge
cases where the guest starts using EEH, then it starts failing because
of a change in status.

However, it's not really any worse than the current situation.  Cases that
would have worked previously will still work (i.e. VFIO devices from at
most one VFIO IOMMU group per vPHB), it's just that it's no longer
necessary to use spapr-vfio-pci-host-bridge with the groupid pre-specified.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2016-03-16 09:55:11 +11:00
David Gibson
fbb4e98341 spapr_pci: Eliminate class callbacks
The EEH operations in the spapr-vfio-pci-host-bridge no longer rely on the
special groupid field in sPAPRPHBVFIOState.  So we can simplify, removing
the class specific callbacks with direct calls based on a simple
spapr_phb_eeh_enabled() helper.  For now we implement that in terms of
a boolean in the class, but we'll continue to clean that up later.

On its own this is a rather strange way of doing things, but it's a useful
intermediate step to further cleanups.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2016-03-16 09:55:10 +11:00
David Gibson
76a9e9f680 spapr_pci: Switch to vfio_eeh_as_op() interface
This switches all EEH on VFIO operations in spapr_pci_vfio.c from the
broken vfio_container_ioctl() interface to the new vfio_as_eeh_op()
interface.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2016-03-16 09:55:10 +11:00
David Gibson
3153119e9b vfio: Start improving VFIO/EEH interface
At present the code handling IBM's Enhanced Error Handling (EEH) interface
on VFIO devices operates by bypassing the usual VFIO logic with
vfio_container_ioctl().  That's a poorly designed interface with unclear
semantics about exactly what can be operated on.

In particular it operates on a single vfio container internally (hence the
name), but takes an address space and group id, from which it deduces the
container in a rather roundabout way.  groupids are something that code
outside vfio shouldn't even be aware of.

This patch creates new interfaces for EEH operations.  Internally we
have vfio_eeh_container_op() which takes a VFIOContainer object
directly.  For external use we have vfio_eeh_as_ok() which determines
if an AddressSpace is usable for EEH (at present this means it has a
single container with exactly one group attached), and vfio_eeh_as_op()
which will perform an operation on an AddressSpace in the unambiguous case,
and otherwise returns an error.

This interface still isn't great, but it's enough of an improvement to
allow a number of cleanups in other places.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
2016-03-16 09:55:10 +11:00
Greg Kurz
f1a6cf3ef7 spapr_rng: fix race with main loop
Since commit "60253ed1e6ec rng: add request queue support to rng-random",
the use of a spapr_rng device may hang vCPU threads.

The following path is taken without holding the lock to the main loop mutex:

h_random()
  rng_backend_request_entropy()
    rng_random_request_entropy()
      qemu_set_fd_handler()

The consequence is that entropy_available() may be called before the vCPU
thread could even queue the request: depending on the scheduling, it may
happen that entropy_available() does not call random_recv()->qemu_sem_post().
The vCPU thread will then sleep forever in h_random()->qemu_sem_wait().

This could not happen before 60253ed1e6 because entropy_available() used
to call random_recv() unconditionally.

This patch ensures the lock is held to avoid the race.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Reviewed-by: Cédric Le Goater <clg@fr.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-03-16 09:55:06 +11:00
David Gibson
c18ad9a54b target-ppc: Eliminate kvmppc_kern_htab global
fa48b43 "target-ppc: Remove hack for ppc_hash64_load_hpte*() with HV KVM"
purports to remove a hack in the handling of hash page tables (HPTs)
managed by KVM instead of qemu.  However, it actually went in the wrong
direction.

That patch requires anything looking for an external HPT (that is one not
managed by the guest itself) to check both env->external_htab (for a qemu
managed HPT) and kvmppc_kern_htab (for a KVM managed HPT).  That's a
problem because kvmppc_kern_htab is local to mmu-hash64.c, but some places
which need to check for an external HPT are outside that, such as
kvm_arch_get_registers().  The latter was subtly broken by the earlier
patch such that gdbstub can no longer access memory.

Basically a KVM managed HPT is much more like a qemu managed HPT than it is
like a guest managed HPT, so the original "hack" was actually on the right
track.

This partially reverts fa48b43, so we again mark a KVM managed external HPT
by putting a special but non-NULL value in env->external_htab.  It then
goes further, using that marker to eliminate the kvmppc_kern_htab global
entirely.  The ppc_hash64_set_external_hpt() helper function is extended
to set that marker if passed a NULL value (if you're setting an external
HPT, but don't have an actual HPT to set, the assumption is that it must
be a KVM managed HPT).

This also has some flow-on changes to the HPT access helpers, required by
the above changes.

Reported-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Tested-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
2016-03-16 09:55:06 +11:00
David Gibson
e5c0d3ce40 target-ppc: Add helpers for updating a CPU's SDR1 and external HPT
When a Power cpu with 64-bit hash MMU has it's hash page table (HPT)
pointer updated by a write to the SDR1 register we need to update some
derived variables.  Likewise, when the cpu is configured for an external
HPT (one not in the guest memory space) some derived variables need to be
updated.

Currently the logic for this is (partially) duplicated in ppc_store_sdr1()
and in spapr_cpu_reset().  In future we're going to need it in some other
places, so make some common helpers for this update.

In addition the new ppc_hash64_set_external_hpt() helper also updates
SDR1 in KVM - it's not updated by the normal runtime KVM <-> qemu CPU
synchronization.  In a sense this belongs logically in the
ppc_hash64_set_sdr1() helper, but that is called from
kvm_arch_get_registers() so can't itself call cpu_synchronize_state()
without infinite recursion.  In practice this doesn't matter because
the only other caller is TCG specific.

Currently there aren't situations where updating SDR1 at runtime in KVM
matters, but there are going to be in future.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-03-16 09:55:06 +11:00
David Gibson
a7a00a729a target-ppc: Split out SREGS get/put functions
Currently the getting and setting of Power MMU registers (sregs) take up
large inline chunks of the kvm_arch_get_registers() and
kvm_arch_put_registers() functions.  Especially since there are two
variants (for Book-E and Book-S CPUs), only one of which will be used in
practice, this is pretty hard to read.

This patch splits these out into helper functions for clarity.  No
functional change is expected.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
2016-03-16 09:55:05 +11:00
Michael Roth
788d2599de spapr_pci: fix multifunction hotplug
Since 3f1e147, QEMU has adopted a convention of supporting function
hotplug by deferring hotplug events until func 0 is hotplugged.
This is likely how management tools like libvirt would expose
such support going forward.

Since sPAPR guests rely on per-func events rather than
slot-based, our protocol has been to hotplug func 0 *first* to
avoid cases where devices appear within guests without func 0
present to avoid undefined behavior.

To remain compatible with new convention, defer hotplug in a
similar manner, but then generate events in 0-first order as we
did in the past. Once func 0 present, fail any attempts to plug
additional functions (as we do with PCIe).

For unplug, defer unplug operations in a similar manner, but
generate unplug events such that function 0 is removed last in guest.

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-03-16 09:55:05 +11:00
Alexey Kardashevskiy
a88dced8eb target-ppc: Add PVR for POWER8NVL processor
This adds a new POWER8+NVLink CPU PVR which core is identical to POWER8
but has a different PVR. The only available machine now has PVR
pvr 004c 0100 so this defines "POWER8NVL" alias as v1.0.

The corresponding kernel commit is
https://github.com/torvalds/linux/commit/ddee09c099c3
"powerpc: Add PVR for POWER8NVL processor"

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-03-16 09:55:05 +11:00
Benjamin Herrenschmidt
14646457ae ppc: Add a few more P8 PMU SPRs
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-03-16 09:55:05 +11:00
Thomas Huth
1e440cbc99 ppc: Fix migration of the TAR SPR
The TAR special purpose register currently does not get migrated
under KVM because it does not get synchronized with the kernel.
Use spr_register_kvm() instead of spr_register() to fix this issue.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-03-16 09:55:05 +11:00
Thomas Huth
d6f1445faf ppc: Define the PSPB register on POWER8
POWER8 / PowerISA 2.07 has a new special purpose register called PSPB
("Problem State Priority Boost Register"). The contents of this register
are currently lost during migration. To be able to migrate this register,
too, we've got to define this SPR along with the other SPRs of POWER8.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-03-16 09:55:05 +11:00
Michael S. Tsirkin
3ba6a710e6 acpi-test: update UID for GSI links
Update acpi test data to match
commit 6a991e07bb
("hw/acpi: fix GSI links UID").

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-15 23:25:52 +02:00
Peter Maydell
4caecccbc1 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* Miscellaneous exec.c fixes (Markus, myself)
* Q35 support for -machine kernel_irqchip=split (Rita)
* Chardev replay support (Pavel)
* icount "warping" cleanups (Pavel)

# gpg: Signature made Tue 15 Mar 2016 17:24:08 GMT using RSA key ID 78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"

* remotes/bonzini/tags/for-upstream:
  icount: decouple warp calls
  icount: remove obsolete warp call
  replay: character devices
  exec: fix early return from ram_block_add
  exec: Fix memory allocation when memory path isn't on hugetlbfs
  exec: Fix memory allocation when memory path names new file
  update-linux-headers: Add userfaultfd.h
  kvm: x86: q35: Add support for -machine kernel_irqchip=split for q35

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-15 17:56:14 +00:00
Pavel Dovgalyuk
e76d1798fa icount: decouple warp calls
qemu_clock_warp function is called to update virtual clock when CPU
is sleeping. This function includes replay checkpoint to make execution
deterministic in icount mode.
Record/replay module flushes async event queue at checkpoints.
Some of the events (e.g., block devices operations) include interaction
with hardware. E.g., APIC polled by block devices sets one of IRQ flags.
Flag to be set depends on currently executed thread (CPU or iothread).
Therefore in replay mode we have to process the checkpoints in the same thread
as they were recorded.
qemu_clock_warp function (and its checkpoint) may be called from different
thread. This patch decouples two different execution cases of this function:
call when CPU is sleeping from iothread and call from cpu thread to update
virtual clock.
First task is performed by qemu_start_warp_timer function. It sets warp
timer event to the moment of nearest pending virtual timer.
Second function (qemu_account_warp_timer) is called from cpu thread
before execution of the code. It advances virtual clock by adding the length
of period while CPU was sleeping.

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-Id: <20160310115609.4812.44986.stgit@PASHA-ISP>
[Update docs. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-15 18:23:45 +01:00
Pavel Dovgalyuk
281b2201e4 icount: remove obsolete warp call
qemu_clock_warp call in qemu_tcg_wait_io_event function is not needed
anymore, because it is called in every iteration of main_loop_wait.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-Id: <20160310115603.4812.67559.stgit@PASHA-ISP>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-15 18:23:42 +01:00
Pavel Dovgalyuk
33577b47c6 replay: character devices
This patch implements record and replay of character devices.
It records chardevs communication in replay mode. Recorded information
include data read from backend and counter of bytes written
from frontend to backend to preserve frontend internal state.
If character device was configured through the command line in record mode,
then in replay mode it should be also added to command line. Backend of
the character device could be changed in replay mode.
Replaying of devices that perform ioctl and get_msgfd operations is not
supported.
gdbstub which also acts as a backend is not recorded to allow controlling
the replaying through gdb. Monitor backends are also not recorded.

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-Id: <20160314074436.4980.83856.stgit@PASHA-ISP>
[Add stubs. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-15 18:23:40 +01:00
Paolo Bonzini
39c350ee12 exec: fix early return from ram_block_add
After reporting an error, ram_block_add was going on with the registration
of the RAMBlock.  The visible effect is that it unlocked the ramlist
mutex twice.

Fixes: 528f46af6e
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-15 18:23:33 +01:00
Markus Armbruster
e1fb647199 exec: Fix memory allocation when memory path isn't on hugetlbfs
gethugepagesize() works reliably only when its argument is on
hugetlbfs.  When it's not, it returns the filesystem's "optimal
transfer block size", which may or may not be the actual page size
you'll get when you mmap().

If the value is too small or not a power of two, we fail
qemu_ram_mmap()'s assertions.  These were added in commit 794e8f3
(v2.5.0).  The bug's impact before that is currently unknown.  Seems
fairly unlikely at least when the normal page size is 4KiB.

Else, if the value is too large, we align more strictly than
necessary.

gethugepagesize() goes back to commit c902760 (v0.13).  That commit
clearly intended gethugepagesize() to be used on hugetlbfs only.  Not
only was it named accordingly, it also printed a warning when used on
anything else.  However, the commit neglected to spell out the
restriction in user documentation of -mem-path.

Commit bfc2a1a (v2.5.0) dropped the warning as bogus "because QEMU
functions perfectly well with the path on a regular tmpfs filesystem".
It sure does when you're sufficiently lucky.  In my testing, I was
lucky, too.

Fix by switching to qemu_fd_getpagesize().  Rename the variable
holding its result from hpagesize to page_size.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-3-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-15 18:23:33 +01:00
Markus Armbruster
fd97fd4408 exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names.  Even though it passes O_CREAT to open(),
it actually works only for existing files.  Reproducer adapted from
the commit's qemu-doc.texi update:

    $ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
    qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory

This is because we first get the page size for @path, then open the
actual file.  Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.

Rearrange the code to create the file (if necessary) before getting
its page size.  Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.

While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory.  Help text of -mem-path agrees.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-15 18:23:33 +01:00
Alexey Kardashevskiy
2ae823d4f7 update-linux-headers: Add userfaultfd.h
userfailtfd.h is used by post-copy migration so include it to
the update-linux-headers.sh as we want it updated altogether with
other kernel headers.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Message-Id: <1455512381-15271-1-git-send-email-aik@ozlabs.ru>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-15 18:23:33 +01:00
Rita Sinha
b094f2e015 kvm: x86: q35: Add support for -machine kernel_irqchip=split for q35
The split IRQ chip mode via KVM_CAP_SPLIT_IRQCHIP was introduced with commit
15eafc2e60 but was broken for q35. This patch makes kernel_irqchip=split
functional for q35.

Signed-off-by: Rita Sinha <rita.sinha89@gmail.com>
Message-Id: <1457378525-16455-1-git-send-email-rita.sinha89@gmail.com>
Reviewed-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-15 18:23:33 +01:00
Peter Maydell
a6cdb77f81 Merge remote-tracking branch 'remotes/thibault/tags/samuel-thibault' into staging
slirp: Adding IPv6 support to Qemu -net user mode

# gpg: Signature made Tue 15 Mar 2016 16:06:03 GMT using RSA key ID FB6B2F1D
# gpg: Good signature from "Samuel Thibault <samuel.thibault@gnu.org>"
# gpg:                 aka "Samuel Thibault <sthibault@debian.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@inria.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@labri.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@ens-lyon.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 900C B024 B679 31D4 0F82  304B D017 8C76 7D06 9EE6
#      Subkey fingerprint: F632 74CD C630 0873 CB3D  29D9 E3E5 1CE8 FB6B 2F1D

* remotes/thibault/tags/samuel-thibault:
  slirp: Add IPv6 support to the TFTP code
  qapi-schema, qemu-options & slirp: Adding Qemu options for IPv6 addresses
  slirp: Adding IPv6 address for DNS relay
  slirp: Handle IPv6 in TCP functions
  slirp: Reindent after refactoring
  slirp: Generalizing and neutralizing various TCP functions before adding IPv6 stuff
  slirp: Factorizing tcpiphdr structure with an union
  slirp: Adding IPv6 UDP support
  slirp: Adding ICMPv6 error sending
  slirp: Fix ICMP error sending
  slirp: Adding IPv6, ICMPv6 Echo and NDP autoconfiguration

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-15 17:09:52 +00:00
Peter Maydell
a58a4cb187 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
vhost, virtio, pci, pc, acpi

nvdimm work
sparse cpu id rework
ipmi enhancements
fixes all over the place
pxb option to tweak chassis number

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Tue 15 Mar 2016 14:33:10 GMT using RSA key ID D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"

* remotes/mst/tags/for_upstream: (51 commits)
  hw/acpi: fix GSI links UID
  ipmi: add some local variables in ipmi_sdr_init
  ipmi: remove the need of an ending record in the SDR table
  ipmi: use a function to initialize the SDR table
  ipmi: add a realize function to the device class
  ipmi: add rsp_buffer_set_error() helper
  ipmi: remove IPMI_CHECK_RESERVATION() macro
  ipmi: replace IPMI_ADD_RSP_DATA() macro with inline helpers
  ipmi: remove IPMI_CHECK_CMD_LEN() macro
  MAINTAINERS: machine core
  MAINTAINERS: Add an entry for virtio header files
  pc: acpi: clarify why possible LAPIC entries must be present in MADT
  pc: acpi: drop cpu->found_cpus bitmap
  pc: acpi: create Processor and Notify objects only for valid lapics
  pc: acpi: create MADT.lapic entries only for valid lapics
  pc: acpi: SRAT: create only valid processor lapic entries
  pc: acpi: cleanup qdev_get_machine() calls
  machine: introduce MachineClass.possible_cpu_arch_ids() hook
  pc: init pcms->apic_id_limit once and use it throughout pc.c
  pc: acpi: remove NOP assignment
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-15 16:43:48 +00:00
Thomas Huth
fad7fb9ccd slirp: Add IPv6 support to the TFTP code
Add the handler code for incoming TFTP packets to udp6_input(),
and make sure that the TFTP code can send packets with both,
udp_output() and udp6_output() by introducing a wrapper function
called tftp_udp_output().

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2016-03-15 17:05:34 +01:00
Peter Maydell
f84d587111 Merge remote-tracking branch 'remotes/berrange/tags/pull-io-next-2016-03-15-1' into staging
Merge I/O fixes

# gpg: Signature made Tue 15 Mar 2016 14:42:43 GMT using RSA key ID 15104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"

* remotes/berrange/tags/pull-io-next-2016-03-15-1:
  io: stronger check for support for IPv4/6

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-15 15:51:06 +00:00
Marcel Apfelbaum
6a991e07bb hw/acpi: fix GSI links UID
According to the ACPI spec, each UID must be unique.
Use the irq number as UID for GSI links.

Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-15 16:16:57 +02:00
Daniel P. Berrange
cfd47a71df io: stronger check for support for IPv4/6
Instead of just checking for bind(), also check whether
getaddrinfo can resolve IPv6 addresses. This catches
failure when travis runs QEMU builds inside minimal
docker containers

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-15 13:55:52 +00:00
Peter Maydell
d41e0bed7b Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
X86 fixes

# gpg: Signature made Mon 14 Mar 2016 20:26:25 GMT using RSA key ID 984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"

* remotes/ehabkost/tags/x86-pull-request:
  kvm: Remove x2apic feature from CPU model when kernel_irqchip is off
  hyperv: cpu hotplug fix with HyperV enabled

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-15 11:05:37 +00:00
Peter Maydell
9828f9b6c8 Merge remote-tracking branch 'remotes/rth/tags/pull-i386-20160314' into staging
target-i386 fixes

# gpg: Signature made Mon 14 Mar 2016 17:54:06 GMT using RSA key ID 4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"

* remotes/rth/tags/pull-i386-20160314:
  target-i386: Dump unknown opcodes with -d unimp
  target-i386: Fix inhibit irq mask handling
  target-i386: Use gen_nop_modrm for prefetch instructions
  target-i386: Fix addr16 prefix
  target-i386: Fix SMSW for 64-bit mode
  target-i386: Fix SMSW and LMSW from/to register
  target-i386: Avoid repeated calls to the bnd_jmp helper

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-15 10:08:12 +00:00
Yann Bordenave
7aac531ef2 qapi-schema, qemu-options & slirp: Adding Qemu options for IPv6 addresses
This patch adds parameters to manage some new options in the qemu -net
command.
Slirp IPv6 address, network prefix, and DNS IPv6 address can be given in
argument to the qemu command.
Defaults parameters are respectively fec0::2, fec0::, /64 and fec0::3.

Signed-off-by: Yann Bordenave <meow@meowstars.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-03-15 10:35:25 +01:00
Guillaume Subiron
05061d8548 slirp: Adding IPv6 address for DNS relay
This patch adds an IPv6 address to the DNS relay. in6_equal_dns() is
developed using this Slirp attribute.
sotranslate_in/out/accept() are also updated to manage the IPv6 case so the
guest can be able to join the host using one of the Slirp addresses.

For now this only points to localhost. Further development will be needed to
automatically fetch the IPv6 address from resolv.conf, and announce this via
RDNSS.

Signed-off-by: Guillaume Subiron <maethor@subiron.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-03-15 10:35:22 +01:00
Guillaume Subiron
3feea4447f slirp: Handle IPv6 in TCP functions
This patch adds IPv6 case in TCP functions refactored by the last
patches.
This also adds IPv6 pseudo-header in tcpiphdr structure.
Finally, tcp_input() is called by ip6_input().

Signed-off-by: Guillaume Subiron <maethor@subiron.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-03-15 10:35:19 +01:00
Guillaume Subiron
1252cf40a8 slirp: Reindent after refactoring
No code change.

Signed-off-by: Guillaume Subiron <maethor@subiron.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-03-15 10:35:17 +01:00
Guillaume Subiron
9dfbf250d2 slirp: Generalizing and neutralizing various TCP functions before adding IPv6 stuff
Basically, this patch adds some switch in various TCP functions to
prepare them for the IPv6 case.

To have something to "switch" in tcp_input() and tcp_respond(), a new
argument is used to give them the sa_family of the addresses they are
working on.

This patch does not include the entailed reindentation, to make proofread
easier. Reindentation is adressed in the following no-op patch.

Signed-off-by: Guillaume Subiron <maethor@subiron.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-03-15 10:35:14 +01:00
Guillaume Subiron
98c63057d2 slirp: Factorizing tcpiphdr structure with an union
This patch factorizes the tcpiphdr structure to put the IPv4 fields in
an union, for addition of version 6 in further patch.
Using some macros, retrocompatibility of the existing code is assured.

This patch also fixes the SLIRP_MSIZE and margin computation in various
functions, and makes them compatible with the new tcpiphdr structure,
whose size will be bigger than sizeof(struct tcphdr) + sizeof(struct ip)

Signed-off-by: Guillaume Subiron <maethor@subiron.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-03-15 10:35:11 +01:00
Guillaume Subiron
15d62af4b6 slirp: Adding IPv6 UDP support
This adds the sin6 case in the fhost and lhost unions and related macros.
It adds udp6_input() and udp6_output().
It adds the IPv6 case in sorecvfrom().
Finally, udp_input() is called by ip6_input().

Signed-off-by: Guillaume Subiron <maethor@subiron.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-03-15 10:35:08 +01:00
Yann Bordenave
fc6c9257c6 slirp: Adding ICMPv6 error sending
Adding icmp6_send_error to send ICMPv6 Error messages. This function is
simpler than the v4 version.
Adding some calls in various functions to send ICMP errors, when a
received packet is too big, or when its hop limit is 0.

Signed-off-by: Yann Bordenave <meow@meowstars.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-03-15 10:35:04 +01:00
Yann Bordenave
de40abfecf slirp: Fix ICMP error sending
Disambiguation : icmp_error is renamed into icmp_send_error, since it
doesn't manage errors, but only sends ICMP Error messages.

Signed-off-by: Yann Bordenave <meow@meowstars.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-03-15 10:35:02 +01:00
Guillaume Subiron
0d6ff71ae3 slirp: Adding IPv6, ICMPv6 Echo and NDP autoconfiguration
This patch adds the functions needed to handle IPv6 packets. ICMPv6 and
NDP headers are implemented.

Slirp is now able to send NDP Router or Neighbor Advertisement when it
receives Router or Neighbor Solicitation. Using a 64bit-sized IPv6
prefix, the guest is now able to perform stateless autoconfiguration
(SLAAC) and to compute its IPv6 address.

This patch adds an ndp_table, mainly inspired by arp_table, to keep an
NDP cache and manage network address resolution.
Slirp regularly sends NDP Neighbor Advertisement, as recommended by the
RFC, to make the guest refresh its route.

This also adds ip6_cksum() to compute ICMPv6 checksums using IPv6
pseudo-header.

Some #define ETH_* are moved upper in slirp.h to make them accessible to
other slirp/*.h

Signed-off-by: Guillaume Subiron <maethor@subiron.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-03-15 10:35:00 +01:00
Peter Maydell
1a8b408168 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Mon 14 Mar 2016 16:36:52 GMT using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream: (40 commits)
  iotests: Add test for QMP event rates
  monitor: Use QEMU_CLOCK_VIRTUAL for the event queue in qtest mode
  monitor: Separate QUORUM_REPORT_BAD events according to the node name
  quorum: Fix crash in quorum_aio_cb()
  iotests: Correct 081's reference output
  block: Remove unused typedef of BlockDriverDirtyHandler
  block: Move block dirty bitmap code to separate files
  typedefs: Add BdrvDirtyBitmap
  block: Include hbitmap.h in block.h
  backup: Use Bitmap to replace "s->bitmap"
  vpc: Use BB functions in .bdrv_create()
  vmdk: Use BB functions in .bdrv_create()
  vhdx: Use BB functions in .bdrv_create()
  vdi: Use BB functions in .bdrv_create()
  sheepdog: Use BB functions in .bdrv_create()
  qed: Use BB functions in .bdrv_create()
  qcow2: Use BB functions in .bdrv_create()
  qcow: Use BB functions in .bdrv_create()
  parallels: Use BB functions in .bdrv_create()
  block: Introduce blk_set_allow_write_beyond_eof()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-15 09:13:06 +00:00
Lan Tianyu
492a4c94be kvm: Remove x2apic feature from CPU model when kernel_irqchip is off
x2apic feature is in the kvm_default_props and automatically added to all
CPU models when KVM is enabled. But userspace devices don't support x2apic
which can't be enabled without the in-kernel irqchip. It will trigger
warning of "host doesn't support requested feature: CPUID.01H:ECX.x2apic
[bit 21]" when kernel_irqchip is off. This patch is to fix it via removing
x2apic feature when kernel_irqchip is off.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-03-14 17:26:06 -03:00
Denis V. Lunev
4467c6c118 hyperv: cpu hotplug fix with HyperV enabled
With Hyper-V enabled CPU hotplug stops working. The CPU appears
in device manager on Windows but does not appear in peformance
monitor and control panel.

The root of the problem is the following. Windows checks
HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE bit in CPUID. The
presence of this bit is enough to cure the situation.

The bit should be set when CPU hotplug is allowed for HyperV VM.
The check that hot_add_cpu callback is defined is enough from the
protocol point of view. Though this callback is defined almost
always thus there is no need to export that knowledge in the
other way.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Richard Henderson <rth@twiddle.net>
CC: Eduardo Habkost <ehabkost@redhat.com>
CC: "Andreas Färber" <afaerber@suse.de>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-03-14 17:26:06 -03:00
Richard Henderson
b9f9c5b41a target-i386: Dump unknown opcodes with -d unimp
We discriminate here between opcodes that are illegal in the current
cpu mode or with illegal arguments (such as modrm.mod == 3) and
encodings that are unknown (such as an unimplemented isa extension).

Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-03-14 10:53:07 -07:00
Richard Henderson
f083d92c03 target-i386: Fix inhibit irq mask handling
The patch in 7f0b714 was too simplistic, in that we wound up setting
the flag and then resetting it immediately in gen_eob.

Fixes the reported boot problem with Windows XP.

Reported-by: Hervé Poussineau <hpoussin@reactos.org>
Tested-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-03-14 10:53:02 -07:00
Richard Henderson
26317698ef target-i386: Use gen_nop_modrm for prefetch instructions
Tested-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-03-14 10:52:56 -07:00
Paolo Bonzini
e2e02a8207 target-i386: Fix addr16 prefix
While ADDSEG will only be false in 16-bit mode for LEA, it can be
false even in other cases when 16-bit addresses are obtained via
the 67h prefix in 32-bit mode.  In this case, gen_lea_v_seg forgets
to add a nonzero FS or GS base if CS/DS/ES/SS are all zero.  This
case is pretty rare but happens when booting Windows 95/98, and
this patch fixes it.

The bug is visible since commit d6a291498, but it was introduced
together with gen_lea_v_seg and it probably could be reproduced
with a "addr16 gs movsb" instruction as early as in commit
ca2f29f555.

Reported-by: Hervé Poussineau <hpoussin@reactos.org>
Tested-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1456931078-21635-1-git-send-email-pbonzini@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-03-14 10:52:48 -07:00
Richard Henderson
a657f79e32 target-i386: Fix SMSW for 64-bit mode
In non-64-bit modes, the instruction always stores 16 bits.
But in 64-bit mode, when the destination is a register, the
instruction can write 32 or 64 bits.

Tested-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-03-14 10:52:42 -07:00
Paolo Bonzini
880f848650 target-i386: Fix SMSW and LMSW from/to register
SMSW and LMSW accept register operands, but commit 1906b2a ("target-i386:
Rearrange processing of 0F 01", 2016-02-13) did not account for that.

Fixes: 1906b2af7c
Reported-by: Hervé Poussineau <hpoussin@reactos.org>
Tested-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1456845134-18812-1-git-send-email-pbonzini@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-03-14 10:52:29 -07:00
Paolo Bonzini
8b33e82b86 target-i386: Avoid repeated calls to the bnd_jmp helper
Two flags were tested the wrong way.

Tested-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1456845145-18891-1-git-send-email-pbonzini@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
[rth: Fixed enable test as well.]
2016-03-14 10:45:41 -07:00
Kevin Wolf
0d611402a1 Merge remote-tracking branch 'mreitz/tags/pull-block-for-kevin-2016-03-14-v2' into queue-block
Block patches for pi day, v2.

# gpg: Signature made Mon Mar 14 17:35:29 2016 CET using RSA key ID E838ACAD
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"

* mreitz/tags/pull-block-for-kevin-2016-03-14-v2:
  iotests: Add test for QMP event rates
  monitor: Use QEMU_CLOCK_VIRTUAL for the event queue in qtest mode
  monitor: Separate QUORUM_REPORT_BAD events according to the node name
  quorum: Fix crash in quorum_aio_cb()
  iotests: Correct 081's reference output
  block: Remove unused typedef of BlockDriverDirtyHandler
  block: Move block dirty bitmap code to separate files
  typedefs: Add BdrvDirtyBitmap
  block: Include hbitmap.h in block.h
  backup: Use Bitmap to replace "s->bitmap"

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 17:36:31 +01:00
Alberto Garcia
7223c48cff iotests: Add test for QMP event rates
This test verifies that the rate-limited QMP events are emitted at a
maximum rate of 1 per second as defined in monitor_qapi_event_conf in
monitor.c

It also checks that QUORUM_REPORT_BAD events generated from different
nodes are kept in separate queues so they don't mask each other.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 0dbd3ee88a59a6363042ad81cfb345037bfbf612.1457610443.git.berto@igalia.com
[mreitz@redhat.com: Renamed test from 146 to 148]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-03-14 17:35:06 +01:00
Alberto Garcia
dc59997871 monitor: Use QEMU_CLOCK_VIRTUAL for the event queue in qtest mode
This allows us to perform tests on the monitor queues to verify that
the rate limits are enforced.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: dde511809e954a5c32d5b648bb184c03c89ed5d5.1457610443.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-03-14 17:35:06 +01:00
Alberto Garcia
6d425eb94d monitor: Separate QUORUM_REPORT_BAD events according to the node name
The QUORUM_REPORT_BAD event is emitted whenever there's an I/O error
in a child of a Quorum device. This event is emitted at a maximum rate
of 1 per second. This means that an error in one of the children will
mask errors in the other children if they happen within the same 1
second interval.

This patch modifies qapi_event_throttle_equal() so QUORUM_REPORT_BAD
events are kept separately if they come from different children.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: b989c0cb3755bc4b6696e796fa8ed2ef6c56606a.1457610443.git.berto@igalia.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-03-14 17:35:06 +01:00
Alberto Garcia
b9c600d207 quorum: Fix crash in quorum_aio_cb()
quorum_aio_cb() emits the QUORUM_REPORT_BAD event if there's
an I/O error in a Quorum child. However sacb->aiocb must be
correctly initialized for this to happen. read_quorum_children() and
read_fifo_child() are not doing this, which results in a QEMU crash.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 8138570d071ba7e25db3736979234a1fd71dbd05.1457610443.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-03-14 17:35:06 +01:00
Max Reitz
e3f66e0368 iotests: Correct 081's reference output
The newly added type parameter for the QUORUM_REPORT_BAD event changed
the output of iotest 081, so the reference should be amended
accordingly.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 1457705687-27122-1-git-send-email-mreitz@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
2016-03-14 17:35:06 +01:00
Fam Zheng
fcce736719 block: Remove unused typedef of BlockDriverDirtyHandler
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 1457412306-18940-6-git-send-email-famz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-03-14 17:35:05 +01:00
Fam Zheng
ebab225910 block: Move block dirty bitmap code to separate files
The only code change is making bdrv_dirty_bitmap_truncate public. It is
used in block.c.

Also two long lines (bdrv_get_dirty) are wrapped.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 1457412306-18940-5-git-send-email-famz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-03-14 17:35:05 +01:00
Fam Zheng
9a3f5cf1bf typedefs: Add BdrvDirtyBitmap
Following patches to refactor and move block dirty bitmap code could use
this.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 1457412306-18940-4-git-send-email-famz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-03-14 17:35:05 +01:00
Fam Zheng
78f9dc859d block: Include hbitmap.h in block.h
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 1457412306-18940-3-git-send-email-famz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-03-14 17:35:05 +01:00
Fam Zheng
b2f56462d5 backup: Use Bitmap to replace "s->bitmap"
"s->bitmap" tracks done sectors, we only check bit states without using any
iterator which HBitmap is good for. Switch to "Bitmap" which is simpler and
more memory efficient.

Meanwhile, rename it to done_bitmap, to reflect the intention.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 1457412306-18940-2-git-send-email-famz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-03-14 17:35:05 +01:00
Peter Maydell
618a5a8bc5 Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Mon 14 Mar 2016 11:27:01 GMT using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"

* remotes/stefanha/tags/tracing-pull-request:
  trace: separate MMIO tracepoints from TB-access tracepoints
  trace: include CPU index in trace_memory_region_*()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-14 16:22:17 +00:00
Kevin Wolf
b8f45cdf78 vpc: Use BB functions in .bdrv_create()
All users of the block layers are supposed to go through a BlockBackend.
The .bdrv_create() implementation is one such user, so this patch
converts it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:44 +01:00
Kevin Wolf
c4bea1690e vmdk: Use BB functions in .bdrv_create()
All users of the block layers are supposed to go through a BlockBackend.
The .bdrv_create() implementation is one such user, so this patch
converts it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
10bf03af12 vhdx: Use BB functions in .bdrv_create()
All users of the block layers are supposed to go through a BlockBackend.
The .bdrv_create() implementation is one such user, so this patch
converts it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
a08f0c3b5f vdi: Use BB functions in .bdrv_create()
All users of the block layers are supposed to go through a BlockBackend.
The .bdrv_create() implementation is one such user, so this patch
converts it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
fba98d455a sheepdog: Use BB functions in .bdrv_create()
All users of the block layers are supposed to go through a BlockBackend.
The .bdrv_create() implementation is one such user, so this patch
converts it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
8a56fdadaf qed: Use BB functions in .bdrv_create()
All users of the block layers are supposed to go through a BlockBackend.
The .bdrv_create() implementation is one such user, so this patch
converts it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
23588797b6 qcow2: Use BB functions in .bdrv_create()
All users of the block layers are supposed to go through a BlockBackend.
The .bdrv_create() implementation is one such user, so this patch
converts it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
6af4016020 qcow: Use BB functions in .bdrv_create()
All users of the block layers are supposed to go through a BlockBackend.
The .bdrv_create() implementation is one such user, so this patch
converts it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
8942764f54 parallels: Use BB functions in .bdrv_create()
All users of the block layers are supposed to go through a BlockBackend.
The .bdrv_create() implementation is one such user, so this patch
converts it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
c10c9d9615 block: Introduce blk_set_allow_write_beyond_eof()
We check that the guest can't write beyond the end of its disk, but for
other internal users it can make sense to allow growing a file.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
6340472c54 block: Use writeback in .bdrv_create() implementations
There's no reason to use a writethrough cache mode while creating an
image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
2073d410ce hmp: Extend drive_del to delete nodes without BB
Now that we can use drive_add to create new nodes without a BB, we also
want to be able to delete such nodes again.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
abb21ac3e6 hmp: 'drive_add -n' for creating a node without BB
This patch adds an option to the drive_add HMP command to create only a
BlockDriverState without a BlockBackend on top.

The motivation for this is that libvirt needs to specify options to a
migration target (specifically, detect-zeroes). drive-mirror doesn't
allow specifying options, and the proper way to do this is to create the
target BDS separately with blockdev-add (where you can specify options)
and then use blockdev-mirror to that BDS.

However, libvirt can't use blockdev-add as long as it is still
experimental, and we're expecting that it will still take some time, so
we need to resort to drive_add.

The problem with drive_add is that so far it always created a BB, and
BDSes with a BB can't be used as a mirroring target as long as we don't
support multiple BBs per BDS - and while we're working towards that
goal, it's another thing that will still take some time.

So to achieve the goal, the simplest solution to provide the
functionality now without adding one-off options to the mirror QMP
commands is to extend drive_add to create nodes without BBs.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Fam Zheng
71968dbfd8 vmdk: Switch to heap arrays for vmdk_parent_open
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Fam Zheng
5997c210b9 vmdk: Switch to heap arrays for vmdk_read_cid
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Fam Zheng
965415eb20 vmdk: Switch to heap arrays for vmdk_write_cid
It is only called once for each opened image, so we can do it the easy
way.

Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
a81d616437 block: Fix cache mode defaults in bds_tree_init()
Without setting explicit defaults in the options, blockdev-add without
an ID ended up defaulting to writethrough. It should be writeback as
documented.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
73176bee99 block: Fix snapshot=on cache modes
Since commit 91a097e, we end up with a somewhat weird cache mode
configuration with snapshot=on: The commit broke the cache mode
inheritance for the snapshot overlay so that it is opened as
writethrough instead of unsafe now. The following bdrv_append() call to
put it on top of the tree swaps the WCE flag with the snapshot's backing
file (i.e. the originally given file), so what we eventually get is
cache=writeback on the temporary overlay and
cache=writethrough,cache.no-flush=on on the real image file.

This patch changes things so that the temporary overlay gets
cache=unsafe again like it used to, and the real images get whatever the
user specified. This means that cache.direct is now respected even with
snapshot=on, and in the case of committing changes, the final flush is
no longer ignored except explicitly requested by the user.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-03-14 16:46:43 +01:00
Kevin Wolf
f86b8b584b blockdev: Snapshotting must not open second instance of old top
Calling bdrv_img_create() with a size of -1 means that it determines the
size automatically by opening the backing file. However, in the case of
live snapshots, the backing file is already opened and we must avoid
opening the same image twice at the same time. Apart from that, just
getting the size from the already existing BDS is a lot less overhead
than opening a new instance.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
2016-03-14 16:46:43 +01:00
Changlong Xie
924e8a2bbc quorum: modify vote rules for flush operation
Keep flush interface the same logic as quorum read/write, Otherwise in
following scenario, we'll encounter unexpected errors.

Quorum has two children(A, B). A do flush sucessfully, but B flush failed.
This cause the filesystem of guest become read-only with following errors:

end_request: I/O error, dev vda, sector 11159960
Aborting journal on device vda3-8
EXT4-fs error (device vda3): ext4_journal_start_sb:327: Detected abort journal
EXT4-fs (vda3): Remounting filesystem read-only

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Changlong Xie
0ae053b7e1 qmp event: Refactor QUORUM_REPORT_BAD
Introduce QuorumOpType, and make QUORUM_REPORT_BAD compatible
with it.

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Changlong Xie
58346b82ed docs: fix invalid node name in qmp event
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:43 +01:00
Jeff Cody
1001dd9f84 block/vpc: add tests for image creation force_size parameter
Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:42 +01:00
Jeff Cody
fb9245c261 block/vpc: give option to force the current_size field in .bdrv_create
When QEMU creates a VHD image, it goes by the original spec,
calculating the current_size based on the nearest CHS geometry (with an
exception for disks > 127GB).

Apparently, Azure will only allow images that are sized to the nearest
MB, and the current_size as calculated from CHS cannot guarantee that.

Allow QEMU to create images similar to how Hyper-V creates images, by
setting current_size to the specified virtual disk size.  This
introduces an option, force_size, to be passed to the vpc format during
image creation, e.g.:

    qemu-img convert -f raw -o force_size -O vpc test.img test.vhd

When using the "force_size" option, the creator app field used by
QEMU will be "qem2" instead of "qemu", to indicate the difference.
In light of this, we also add parsing of the "qem2" field during
vpc_open.

Bug reference: https://bugs.launchpad.net/qemu/+bug/1490611

Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:42 +01:00
Jeff Cody
798609bbe2 block/vpc: tests for auto-detecting VPC and Hyper-V VHD images
This tests auto-detection, and overrides, of VHD image sizes created
by Virtual PC, Hyper-V, and Disk2vhd.

This adds three sample images:

hyperv2012r2-dynamic.vhd.bz2 - dynamic VHD image created with Hyper-V
virtualpc-dynamic.vhd.bz2    - dynamic VHD image created with Virtual PC
d2v-zerofilled.vhd.bz2       - dynamic VHD image created with Disk2vhd

Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:42 +01:00
Jeff Cody
c540d53ac8 block/vpc: choose size calculation method based on creator_app field
The VHD file format is used by both Virtual PC, and Hyper-V.  However,
how the virtual disk size is calculated varies between the two.

Virtual PC uses the CHS drive parameters to determine the drive size.
Hyper-V, on the other hand, uses the current_size field in the footer
when determining image size.

This is problematic for a few reasons:

* VHD images from Hyper-V, using CHS calculations, will likely be
  trunctated.

* If we just rely always on current_size, then QEMU may have data
  compatibility issues with Virtual PC (we may write too much data
  into a VHD file to be used by Virtual PC, for instance).

* Existing VHD images created by QEMU have used the CHS calculations,
  except for images exceeding the 127GB limit.  We want to remain
  compatible with our own generated images.

Luckily, the VHD specification defines a 'Creator App' field, that is
used to indicate what software created the VHD file.

This patch does two things:

    1. Uses the 'Creator App' field to help determine how to calculate
       size, and

    2. Adds a VPC format option 'force_size_calc', so that the user can
       override the 'Creator App' auto-detection, in case there exist
       VHD images with unknown or contradictory 'Creator App' entries.

N.B.: We currently use the maximum CHS value as an indication to use the
current_size field.  This patch does not change that, even with the
'force_size_calc' option.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:42 +01:00
Kevin Wolf
c21cc6ca98 block/qapi: Include empty drives in query-blockstats
Since commit 5ec18f8c, query-blockstats didn't return the statistics of
drives without media any more because such drives have only a BB now,
but not a BDS any more.

This patch fixes the regression so that query-blockstats iterates over
BBs by default and empty drives are displayed again.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-03-14 16:46:42 +01:00
Kevin Wolf
b07363a1a3 block/qapi: Factor out bdrv_query_bds_stats()
The new functions handles the data that is taken from the
BlockDriverState.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-03-14 16:46:42 +01:00
Kevin Wolf
2b77e60ab8 block/qapi: Factor out bdrv_query_blk_stats()
The new functions handles the data that is taken from the BlockBackend.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-03-14 16:46:42 +01:00
Paolo Bonzini
396374caea qemu-img: eliminate memory leak
Not particularly important since qemu-img exits immediately after
calling img_rebase, but easily fixed.  Coverity says thanks.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-03-14 16:46:42 +01:00
Peter Maydell
6dcea61425 Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20160311.0' into staging
VFIO updates 2016-03-11

 - Allow devices to be specified via sysfs path (Alex Williamson)
 - vfio region helpers and generalization for future device specific regions
   (Alex Williamson)
 - Automatic ROM device ID and checksum fixup (Alex Williamson)
 - Split VGA setup to allow enabling VGA from quirks (Alex Williamson)
 - Remove fixed string limit for ROM MemoryRegion name (Neo Jia)
 - MAINTAINERS update (Thomas Huth)

# gpg: Signature made Fri 11 Mar 2016 15:55:31 GMT using RSA key ID 3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"

* remotes/awilliam/tags/vfio-update-20160311.0:
  MAINTAINERS: Add entry for the include/hw/vfio/ folder
  vfio/pci: replace fixed string limit by g_strdup_printf
  vfio/pci: Split out VGA setup
  vfio/pci: Fixup PCI option ROMs
  vfio/pci: Convert all MemoryRegion to dynamic alloc and consistent functions
  vfio: Generalize region support
  vfio: Wrap VFIO_DEVICE_GET_REGION_INFO
  vfio: Add sysfsdev property for pci & platform

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-14 15:11:39 +00:00
Peter Maydell
0dcee62261 Merge remote-tracking branch 'remotes/amit-migration/tags/migration-for-2.6-7' into staging
migration:
 - postcopy is no longer experimental
 - fix a use-after-free in postcopy
 - fix a compile warning

# gpg: Signature made Fri 11 Mar 2016 12:29:33 GMT using RSA key ID 854083B6
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"

* remotes/amit-migration/tags/migration-for-2.6-7:
  postcopy: Remove the x-
  postcopy: listen thread is never joined
  migration: fix use-after-free in loadvm_postcopy_handle_run_bh
  migration: fix warning for source_return_path_thread

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-14 13:51:21 +00:00
Peter Maydell
8326ec2c83 Merge remote-tracking branch 'remotes/berrange/tags/pull-io-win32-2016-03-11-1' into staging
Merge I/O fixes for win32

# gpg: Signature made Fri 11 Mar 2016 10:03:20 GMT using RSA key ID 15104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"

* remotes/berrange/tags/pull-io-win32-2016-03-11-1:
  osdep: remove use of socket_error() from all code
  osdep: add wrappers for socket functions
  char: remove qemu_chr_open_socket_fd method
  char: remove socket_try_connect method
  char: remove qemu_chr_finish_socket_connection method
  io: implement socket watch for win32 using WSAEventSelect+select
  io: remove checking of EWOULDBLOCK
  io: use qemu_accept to ensure SOCK_CLOEXEC is set
  io: introduce qio_channel_create_socket_watch
  io: pass HANDLE to g_source_add_poll on Win32
  io: fix copy+paste mistake in socket error message
  io: assert errors before asserting content in I/O test
  io: set correct error object in background reader test thread
  io: wait for incoming client in socket test
  io: bind to socket before creating QIOChannelSocket
  io: initialize sockets in test program
  io: use bind() to check for IPv4/6 availability
  osdep: fix socket_error() to work with Mingw64

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-14 11:49:33 +00:00
Peter Maydell
d1ab9681ac Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160311' into staging
CPU hotplug via cpu-add for s390x, cleanup of the s390x machine
compat code and a bugfix in the s390-ccw bios.

# gpg: Signature made Fri 11 Mar 2016 09:48:02 GMT using RSA key ID C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"

* remotes/cohuck/tags/s390x-20160311:
  s390x/cpu: use g_new0
  s390x: Introduce S390MachineClass
  s390x: Introduce machine definition macros
  pc-bios/s390-ccw: fix old bug in ptr increment
  s390x/cpu: Allow hotplug of CPUs
  s390x/cpu: Add error handling to cpu creation
  s390x/cpu: Add CPU property links
  s390x/cpu: Tolerate max_cpus
  s390x/cpu: Get rid of side effects when creating a vcpu
  s390x/cpu: Set initial CPU state in common routine
  s390x/cpu: Cleanup init in preparation for hotplug

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-14 11:13:11 +00:00
Hollis Blanchard
f2d089425d trace: separate MMIO tracepoints from TB-access tracepoints
Memory accesses to code which has previously been translated into a TB show up
in the MMIO path, so that they may invalidate the TB. It's extremely confusing
to mix those in with device MMIOs, so split them into their own tracepoint.

Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1456949575-1633-2-git-send-email-hollis_blanchard@mentor.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-14 09:34:30 +00:00
Hollis Blanchard
5a68be94ac trace: include CPU index in trace_memory_region_*()
Knowing which CPU performed an action is essential for understanding SMP guest
behavior.

However, cpu_physical_memory_rw() may be executed by a machine init function,
before any VCPUs are running, when there is no CPU running ('current_cpu' is
NULL). In this case, store -1 in the trace record as the CPU index. Trace
analysis tools may need to be aware of this special case.

Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
Message-id: 1456949575-1633-1-git-send-email-hollis_blanchard@mentor.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-14 09:34:30 +00:00
Cédric Le Goater
5167560b03 ipmi: add some local variables in ipmi_sdr_init
This patch adds a couple of variables to manipulate the raw sdr
entries. The const attribute is also removed on init_sdrs. This will
ease the introduction of a sdr loader using a file.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
Acked-by: Corey Minyard <cminyard@mvista.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:13 +02:00
Cédric Le Goater
52fc01d973 ipmi: remove the need of an ending record in the SDR table
Currently, the code initializing the sdr table relies on an ending
record with a recid of 0xffff. This patch changes the loop to use the
sdr size as a breaking condition.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
Acked-by: Corey Minyard <cminyard@mvista.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:13 +02:00
Cédric Le Goater
4fa9f08e96 ipmi: use a function to initialize the SDR table
This patch moves the code section initializing the sdrs in its own
routine to prepare ground for changes in the subsequent patches.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
Acked-by: Corey Minyard <cminyard@mvista.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:13 +02:00
Cédric Le Goater
0bc6001f0d ipmi: add a realize function to the device class
This will be useful to define and use properties when the object is
instantiated.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
Acked-by: Corey Minyard <cminyard@mvista.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:13 +02:00
Cédric Le Goater
6acb971a94 ipmi: add rsp_buffer_set_error() helper
The third byte in the response buffer of an IPMI command holds the
error code. In many IPMI command handlers, this byte is updated
directly. This patch adds a helper routine to clarify why this byte is
being used.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
Acked-by: Corey Minyard <cminyard@mvista.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:13 +02:00
Cédric Le Goater
7f996411ad ipmi: remove IPMI_CHECK_RESERVATION() macro
Some IPMI command handlers in the BMC simulator use a macro
IPMI_CHECK_RESERVATION() to check a SDR reservation but the macro
implicitly uses local variables. This patch simply removes it.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
Acked-by: Corey Minyard <cminyard@mvista.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:13 +02:00
Cédric Le Goater
a580d82085 ipmi: replace IPMI_ADD_RSP_DATA() macro with inline helpers
The IPMI command handlers in the BMC simulator use a macro
IPMI_ADD_RSP_DATA() to push bytes in a response buffer. The macro
hides the fact that it implicitly uses variables local to the handler,
which is misleading.

This patch introduces a simple 'struct RspBuffer' and inlined helper
routines to store byte(s) in a response buffer. rsp_buffer_push()
replaces the macro IPMI_ADD_RSP_DATA() and rsp_buffer_pushmore() is
new helper to push multiple bytes. The latest is used in the command
handlers get_msg() and get_sdr() which are manipulating the buffer
directly.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
Acked-by: Corey Minyard <cminyard@mvista.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:13 +02:00
Cédric Le Goater
4f298a4b29 ipmi: remove IPMI_CHECK_CMD_LEN() macro
Most IPMI command handlers in the BMC simulator start with a call to
the macro IPMI_CHECK_CMD_LEN() which verifies that a minimal number of
arguments expected by the command are indeed available. To achieve
this task, the macro implicitly uses local variables which is
misleading in the code.

This patch adds a 'cmd_len_min' attribute to the struct IPMICmdHandler
defining the minimal number of arguments expected by the command and
moves this check in the global command handler ipmi_sim_handle_command().

To clarify the checks being done on the received command, the patch
introduces a helper ipmi_get_handler().

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
Acked-by: Corey Minyard <cminyard@mvista.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:13 +02:00
Michael S. Tsirkin
5da4fb0018 MAINTAINERS: machine core
Marcel and Eduardo agreed to co-maintain these.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:13 +02:00
Thomas Huth
494f7b572e MAINTAINERS: Add an entry for virtio header files
Files in the include/hw/virtio/ folder should be included in the
"virtio" sections of the MAINTAINERS file.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:13 +02:00
Igor Mammedov
ed2ef10c0c pc: acpi: clarify why possible LAPIC entries must be present in MADT
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:12 +02:00
Igor Mammedov
adcb89d55d pc: acpi: drop cpu->found_cpus bitmap
cpu->found_cpus bitmap is used for setting present
flag in CPON AML package. But it takes a bunch of code
to fill bitmap and could be simplified by getting
presense info from possible CPUs list directly.

So drop cpu->found_cpus bitmap and unroll possible
CPUs list into APIC index array at the place where
CPUON AML package is created.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-03-11 16:59:12 +02:00
Igor Mammedov
2adba0a18a pc: acpi: create Processor and Notify objects only for valid lapics
do not assume that all lapics in range 0..apic_id_limit
are valid and do not create Processor and Notify objects
for not possible lapics.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:12 +02:00
Igor Mammedov
907e7c94d1 pc: acpi: create MADT.lapic entries only for valid lapics
do not assume that all lapics in range 0..apic_id_limit
are valid and do not create lapic entries for not
possible lapics in MADT.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-03-11 16:59:12 +02:00
Igor Mammedov
5803fce389 pc: acpi: SRAT: create only valid processor lapic entries
When APIC IDs are sparse*, in addition to valid LAPIC
entries the SRAT is also filled invalid ones for non
possible APIC IDs.
Fix it by asking machine for all possible APIC IDs
instead of wrongly assuming that all APIC IDs in
range 0..apic_id_limit are possible.

* sparse lapic topology CLI:
     -smp x,sockets=2,cores=3,maxcpus=6
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:12 +02:00
Igor Mammedov
3d3ebcad6a pc: acpi: cleanup qdev_get_machine() calls
cache qdev_get_machine() result in acpi_setup/acpi_build_update
time and pass it as an argument to child functions that need it.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-03-11 16:59:12 +02:00
Igor Mammedov
3811ef14f5 machine: introduce MachineClass.possible_cpu_arch_ids() hook
on x86 currently range 0..max_cpus is used to generate
architecture-dependent CPU ID (APIC Id) for each present
and possible CPUs. However architecture-dependent CPU IDs
list could be sparse and code that needs to enumerate
all IDs (ACPI) ended up doing guess work enumerating all
possible and impossible IDs up to
  apic_id_limit = x86_cpu_apic_id_from_index(max_cpus).

That leads to creation of MADT entries and Processor
objects in ACPI tables for not possible CPUs.
Fix it by allowing board specify a concrete list of
CPU IDs accourding its own rules (which for x86 depends
on topology). So that code that needs this list could
request it from board instead of trying to guess
what IDs are correct on its own.

This interface will also allow to help making AML
part of CPU hotplug target independent so it could
be reused for ARM target.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-03-11 16:59:12 +02:00
Igor Mammedov
ebde2465a9 pc: init pcms->apic_id_limit once and use it throughout pc.c
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-03-11 16:59:12 +02:00
Igor Mammedov
ae29883508 pc: acpi: remove NOP assignment
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:12 +02:00
Cao jin
f9735fd53f pxb: cleanup
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-03-11 16:59:12 +02:00
Marc-André Lureau
342f7a9d05 qemu-char: make tcp_chr_disconnect() reentrant-safe
During CHR_EVENT_CLOSED, the function could be reentered, make this
case safe.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:12 +02:00
Marc-André Lureau
6167ebbd91 qemu-char: remove all msgfds on disconnect
Disconnect should reset context.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:12 +02:00
Marc-André Lureau
869a58af86 qemu-char: avoid potential double-free
If tcp_set_msgfds() is called several time with NULL fds, this
could lead to double-free.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:12 +02:00
Marc-André Lureau
b7fcb3603c vhost-user: remove useless is_server field
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:12 +02:00
Marc-André Lureau
c1bf3531ae vhost-user: fix use after free
"name" is freed after visiting options, instead use the first NetClientState
name. Adds a few assert() for clarifying and checking some impossible states.

READ of size 1 at 0x602000000990 thread T0
    #0 0x7f6b251c570c  (/lib64/libasan.so.2+0x4770c)
    #1 0x5566dc380600 in qemu_find_net_clients_except net/net.c:824
    #2 0x5566dc39bac7 in net_vhost_user_event net/vhost-user.c:193
    #3 0x5566dbee862a in qemu_chr_be_event /home/elmarco/src/qemu/qemu-char.c:201
    #4 0x5566dbef2890 in tcp_chr_disconnect /home/elmarco/src/qemu/qemu-char.c:2790
    #5 0x5566dbef2d0b in tcp_chr_sync_read /home/elmarco/src/qemu/qemu-char.c:2835
    #6 0x5566dbee8a99 in qemu_chr_fe_read_all /home/elmarco/src/qemu/qemu-char.c:295
    #7 0x5566dc39b964 in net_vhost_user_watch net/vhost-user.c:180
    #8 0x5566dc5a06c7 in qio_channel_fd_source_dispatch io/channel-watch.c:70
    #9 0x7f6b1aa2ab87 in g_main_dispatch /home/elmarco/src/gnome/glib/glib/gmain.c:3154
    #10 0x7f6b1aa2b9cb in g_main_context_dispatch /home/elmarco/src/gnome/glib/glib/gmain.c:3769
    #11 0x5566dc475ed4 in glib_pollfds_poll /home/elmarco/src/qemu/main-loop.c:212
    #12 0x5566dc476029 in os_host_main_loop_wait /home/elmarco/src/qemu/main-loop.c:257
    #13 0x5566dc476165 in main_loop_wait /home/elmarco/src/qemu/main-loop.c:505
    #14 0x5566dbf08d31 in main_loop /home/elmarco/src/qemu/vl.c:1932
    #15 0x5566dbf16783 in main /home/elmarco/src/qemu/vl.c:4646
    #16 0x7f6b180bb57f in __libc_start_main (/lib64/libc.so.6+0x2057f)
    #17 0x5566dbbf5348 in _start (/home/elmarco/src/qemu/x86_64-softmmu/qemu-system-x86_64+0x3f9348)

0x602000000990 is located 0 bytes inside of 5-byte region [0x602000000990,0x602000000995)
freed by thread T0 here:
    #0 0x7f6b2521666a in __interceptor_free (/lib64/libasan.so.2+0x9866a)
    #1 0x7f6b1aa332a4 in g_free /home/elmarco/src/gnome/glib/glib/gmem.c:189
    #2 0x5566dc5f416f in qapi_dealloc_type_str qapi/qapi-dealloc-visitor.c:134
    #3 0x5566dc5f3268 in visit_type_str qapi/qapi-visit-core.c:196
    #4 0x5566dc5ced58 in visit_type_Netdev_fields /home/elmarco/src/qemu/qapi-visit.c:5936
    #5 0x5566dc5cef71 in visit_type_Netdev /home/elmarco/src/qemu/qapi-visit.c:5960
    #6 0x5566dc381a8d in net_visit net/net.c:1049
    #7 0x5566dc381c37 in net_client_init net/net.c:1076
    #8 0x5566dc3839e2 in net_init_netdev net/net.c:1473
    #9 0x5566dc63cc0a in qemu_opts_foreach util/qemu-option.c:1112
    #10 0x5566dc383b36 in net_init_clients net/net.c:1499
    #11 0x5566dbf15d86 in main /home/elmarco/src/qemu/vl.c:4397
    #12 0x7f6b180bb57f in __libc_start_main (/lib64/libc.so.6+0x2057f)

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:12 +02:00
Xiao Guangrong
f7df22de56 nvdimm acpi: emulate dsm method
Emulate dsm method after IO VM-exit

Currently, we only introduce the framework and no function is actually
supported

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:11 +02:00
Xiao Guangrong
18c440e1e1 nvdimm acpi: let qemu handle _DSM method
If dsm memory is successfully patched, we let qemu fully emulate
the dsm method

This patch saves _DSM input parameters into dsm memory, tell dsm
memory address to QEMU, then fetch the result from the dsm memory

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:11 +02:00
Xiao Guangrong
b99514135b nvdimm acpi: introduce patched dsm memory
The dsm memory is used to save the input parameters and store
the dsm result which is filled by QEMU.

The address of dsm memory is decided by bios and patched into
int32 object named "MEMA"

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:11 +02:00
Xiao Guangrong
5fe79386ba nvdimm acpi: initialize the resource used by NVDIMM ACPI
32 bits IO port starting from 0x0a18 in guest is reserved for NVDIMM
ACPI emulation. The table, NVDIMM_DSM_MEM_FILE, will be patched into
NVDIMM ACPI binary code

OSPM uses this port to tell QEMU the final address of the DSM memory
and notify QEMU to emulate the DSM method

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:11 +02:00
Gerd Hoffmann
b63283d7c3 pci-ids: add virtio 1.0 ids to spec
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:11 +02:00
Michael S. Tsirkin
2c02a48e6d acpi-test-data: add _DIS methods
commit c82f503dd5
("hw/acpi: fix Q35 support for legacy Windows OS")
added _DIS for all link devices.

Update expected test files accordingly.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:59:11 +02:00
Marcel Apfelbaum
c82f503dd5 hw/acpi: fix Q35 support for legacy Windows OS
Legacy Windows operating systems like Windows XP and Windows 2003
require _DIS method to be present for all interrupt links.

PC machines already have a no-op implemented for GSI links, add
it also in Q35.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
2016-03-11 16:45:21 +02:00
Cao jin
7335a95abd ich9lpc: fix typo
change some "rbca" to "rcrb"(root complex register block) while
the other to "rcba"(root complex base address).
Bonus: add more comments and fix some indentation.

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:45:21 +02:00
Michael S. Tsirkin
226419d615 msi_supported -> msi_nonbroken
Rename controller flag to make it clearer what it means.
Add some documentation as well.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:45:21 +02:00
Gerd Hoffmann
75fd6f13af virtio-pci: call pci reset variant when guest requests reset.
Actually fixes linux not finding virtio 1.0 device virtqueues after
reboot.  Which is new I think, any chance linux kernel virtio code
became more strict in 4.3?

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Fam Zheng <famz@redhat.com>
2016-03-11 16:45:21 +02:00
Michael S. Tsirkin
79248c22ad i386: update expected DSDT
DSDT was changed by:
commit 27b9fc54d2 ("i386: populate floppy
drive information in DSDT").

Update expected files accordingly.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 16:44:58 +02:00
Roman Kagan
27b9fc54d2 i386: populate floppy drive information in DSDT
On x86-based systems Linux determines the presence and the type of
floppy drives via a query of a CMOS field.  So does SeaBIOS when
populating the return data for int 0x13 function 0x08.

However Windows doesn't do it. Instead, it requests this information
from BIOS via int 0x13/0x08 or through ACPI objects _FDE (Floppy Drive
Enumerate) and _FDI (Floppy Drive Information) of the floppy controller
object.  On UEFI systems only ACPI-based detection is supported.

QEMU doesn't provide those objects in its ACPI tables and as a result
floppy drives are invisible to Windows on UEFI/OVMF.

This patch adds those objects to the floppy controller in DSDT,
populating them with the information from respective QEMU objects.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: John Snow <jsnow@redhat.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Kevin O'Connor <kevin@koconnor.net>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 14:55:15 +02:00
Roman Kagan
e08fde0c5e fdc: add function to determine drive chs limits
When populating ACPI objects for floppy drives one needs to provide the
maximum values for cylinder, sector, and head number the drive supports.

This patch adds a function that iterates through the array of predefined
floppy drive formats and returns the maximum values of c, h, s, out of
those matching the given floppy drive type.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: John Snow <jsnow@redhat.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Kevin O'Connor <kevin@koconnor.net>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2016-03-11 14:55:15 +02:00
Roman Kagan
bda055096b i386: expose floppy drive CMOS type
Make it possible to query the CMOS type of a floppy drive outside of the
source file where it's defined.

It will allow to properly populate the corresponding ACPI objects and
thus enable Windows on BIOS-less systems to access the floppy drives.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: John Snow <jsnow@redhat.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Kevin O'Connor <kevin@koconnor.net>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 14:55:15 +02:00
Roman Kagan
9b613f4e40 i386/acpi: make floppy controller object dynamic
Instead of statically declaring the floppy controller in DSDT, with its
_STA method depending on some obscure bit in the parent ISA bridge, add
the object dynamically to DSDT via AML API only when the controller is
present.

The _STA method is no longer necessary and is therefore dropped.  So are
the declarations of the fields indicating whether the contoller is
enabled.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: John Snow <jsnow@redhat.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Kevin O'Connor <kevin@koconnor.net>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 14:55:15 +02:00
Igor Mammedov
c9f4b77ad5 pc-dimm: fix error handling in pc_dimm_check_memdev_is_busy()
If host_memory_backend_get_memory() were to return error and
NULL MemoryRegion, pc_dimm_check_memdev_is_busy() would crash
dereferencing NULL pointer in memory_region_is_mapped().
But if error is set and non NULL MemoryRegion is returned
then error_setg() will fail with "error already set" assertion
in error_setv()

To avoid above issues use typical error handling pattern
for property setters:

Error *local_error = NULL;
...
error_propagate(errp, local_err);

Reported-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 14:55:15 +02:00
Ilya Maximets
fff4e48ed5 vhost-user: verify that number of queues is less than MAX_QUEUE_NUM
Fix QEMU crash when -netdev vhost-user,queues=n is passed with number
of queues greater than MAX_QUEUE_NUM.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2016-03-11 14:55:15 +02:00
Denis V. Lunev
a0d06486b4 virtio-balloon: add 'available' counter
The patch for the kernel part is in linux-next already:
commit ac88e7c908b920866e529862f2b2f0129b254ab2
    Author: Igor Redko <redkoi@virtuozzo.com>
    Date:   Thu Feb 18 09:23:01 2016 +1100

    virtio_balloon: export 'available' memory to balloon statistics

    Add a new field, VIRTIO_BALLOON_S_AVAIL, to virtio_balloon memory
    statistics protocol, corresponding to 'Available' in /proc/meminfo.

Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Igor Redko <redkoi@virtuozzo.com>
CC: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 14:55:15 +02:00
Marcel Apfelbaum
fc1769b758 hw/virtio: group virtio flags into an enum
Minimizes the possibility to assign
the same bit to different features.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2016-03-11 14:54:28 +02:00
Marcel Apfelbaum
631a438755 hw/virtio: fix double use of a virtio flag
Commits 1811e64c and a6df8adf use the same virtio feature bit 4
for different features.

Fix it by using different bits.

Reported-by: Laurent Vivier <lvivier@redhat.com>
Tested-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2016-03-11 14:54:28 +02:00
Ladi Prosek
4eae2a657d balloon: fix segfault and harden the stats queue
The segfault here is triggered by the driver notifying the stats queue
twice after adding a buffer to it. This effectively resets stats_vq_elem
back to NULL and QEMU crashes on the next stats timer tick in
balloon_stats_poll_cb.

This is a regression introduced in 51b19ebe43, although admittedly
the device assumed too much about the stats queue protocol even before
that commit. This commit adds a few more checks and ensures that the one
stats buffer gets deallocated on device reset.

Cc: qemu-stable@nongnu.org
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 14:54:28 +02:00
Michael S. Tsirkin
f203549108 acpi: add build_append_named_dword, returning an offset in buffer
This is a very limited form of support for runtime patching -
similar in functionality to what we can do with ACPI_EXTRACT
macros in python, but implemented in C.

This is to allow ACPI code direct access to data tables -
which is exactly what DataTableRegion is there for, except
no known windows release so far implements DataTableRegion.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 14:54:28 +02:00
Xiao Guangrong
3f3009c098 acpi: allow using object as offset for OperationRegion
Extend aml_operation_region() to use object as offset

Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 14:54:28 +02:00
Xiao Guangrong
9815cba502 acpi: add aml_concatenate()
It will be used by nvdimm acpi

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 14:54:28 +02:00
Xiao Guangrong
39b6dbd8d7 acpi: add aml_create_field()
It will be used by nvdimm acpi

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-11 14:54:27 +02:00
Dr. David Alan Gilbert
32c3db5b26 postcopy: Remove the x-
Postcopy seems to have survived a cycle with only a few fixes,
and Jiri has the current libvirt wired up and working
( https://www.redhat.com/archives/libvir-list/2016-March/msg00080.html )
so remove the experimental tag.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1457690016-9070-3-git-send-email-dgilbert@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-11 17:53:59 +05:30
Dr. David Alan Gilbert
a587a3fe6c postcopy: listen thread is never joined
We don't join the listen thread, it does its own cleanup.
Mark as detached not joinable.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1457690016-9070-2-git-send-email-dgilbert@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-11 17:53:59 +05:30
Denis V. Lunev
8646992279 migration: fix use-after-free in loadvm_postcopy_handle_run_bh
MigrationState is destroyed before we can come into bottom half.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Amit Shah <amit.shah@redhat.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1457537708-8622-1-git-send-email-den@openvz.org>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-11 12:58:45 +05:30
Peter Xu
568b01caf3 migration: fix warning for source_return_path_thread
max_len is not necessary, while it brings a warning during compilation
when specify "-Wstack-usage=1000000". Replacing using sizeof().

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1457503932-31763-1-git-send-email-peterx@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-11 12:58:37 +05:30
Thomas Huth
99b88c6d1f MAINTAINERS: Add entry for the include/hw/vfio/ folder
The headers in include/hw/vfio/ should be listed in the VFIO
section of the MAINTAINERS file.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2016-03-10 20:50:44 -07:00
Neo Jia
062ed5d8d6 vfio/pci: replace fixed string limit by g_strdup_printf
A trivial change to remove string limit by using g_strdup_printf

Tested-by: Neo Jia <cjia@nvidia.com>
Signed-off-by: Neo Jia <cjia@nvidia.com>
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2016-03-10 20:50:43 -07:00
Alex Williamson
e593c0211b vfio/pci: Split out VGA setup
This could be setup later by device specific code, such as IGD
initialization.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2016-03-10 20:50:41 -07:00
Alex Williamson
e2e5ee9c56 vfio/pci: Fixup PCI option ROMs
Devices like Intel graphics are known to not only have bad checksums,
but also the wrong device ID.  This is not so surprising given that
the video BIOS is typically part of the system firmware image rather
that embedded into the device and needs to support any IGD device
installed into the system.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2016-03-10 20:50:39 -07:00
Alex Williamson
2d82f8a3cd vfio/pci: Convert all MemoryRegion to dynamic alloc and consistent functions
Match common vfio code with setup, exit, and finalize functions for
BAR, quirk, and VGA management.  VGA is also changed to dynamic
allocation to match the other MemoryRegions.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2016-03-10 20:50:38 -07:00
Alex Williamson
db0da029a1 vfio: Generalize region support
Both platform and PCI vfio drivers create a "slow", I/O memory region
with one or more mmap memory regions overlayed when supported by the
device. Generalize this to a set of common helpers in the core that
pulls the region info from vfio, fills the region data, configures
slow mapping, and adds helpers for comleting the mmap, enable/disable,
and teardown.  This can be immediately used by the PCI MSI-X code,
which needs to mmap around the MSI-X vector table.

This also changes VFIORegion.mem to be dynamically allocated because
otherwise we don't know how the caller has allocated VFIORegion and
therefore don't know whether to unreference it to destroy the
MemoryRegion or not.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2016-03-10 20:03:16 -07:00
Daniel P. Berrange
b16a44e13e osdep: remove use of socket_error() from all code
Now that QEMU wraps the Win32 sockets methods to automatically
set errno upon failure, there is no reason for callers to use
the socket_error() method. They can rely on accessing errno
even on Win32. Remove all use of socket_error() from general
code, leaving it as a static method in oslib-win32.c only.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:19:34 +00:00
Daniel P. Berrange
a2d96af4bb osdep: add wrappers for socket functions
The windows socket functions look identical to the normal POSIX
sockets functions, but instead of setting errno, the caller needs
to call WSAGetLastError(). QEMU has tried to deal with this
incompatibility by defining a socket_error() method that callers
must use that abstracts the difference between WSAGetLastError()
and errno.

This approach is somewhat error prone though - many callers of
the sockets functions are just using errno directly because it
is easy to forget the need use a QEMU specific wrapper. It is
not always immediately obvious that a particular function will
in fact call into Windows sockets functions, so the dev may not
even realize they need to use socket_error().

This introduces an alternative approach to portability inspired
by the way GNULIB fixes portability problems. We use a macro to
redefine the original socket function names to refer to a QEMU
wrapper function. The wrapper function calls the original Win32
sockets method and then sets errno from the WSAGetLastError()
value.

Thus all code can simply call the normal POSIX sockets APIs are
have standard errno reporting on error, even on Windows. This
makes the socket_error() method obsolete.

We also bring closesocket & ioctlsocket into this approach. Even
though they are non-standard Win32 names, we can't wrap the normal
close/ioctl methods since there's no reliable way to distinguish
between a file descriptor and HANDLE in Win32.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:19:07 +00:00
Daniel P. Berrange
08b758b482 char: remove qemu_chr_open_socket_fd method
The qemu_chr_open_socket_fd method takes care of either doing a
synchronous socket connect, or creating a listener socket. Part
of the work when creating the listener socket is to register a
watch for incoming clients. The caller of qemu_chr_open_socket_fd
may not want this watch created, as it might be doing a synchronous
wait for the first client. Rather than passing yet more parameters
into qemu_chr_open_socket_fd to let it handle this, just remove
the qemu_chr_open_socket_fd method an inline its functionality
into the caller. This allows for a clearer control flow and shorter
code.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:19:07 +00:00
Daniel P. Berrange
317856cac8 char: remove socket_try_connect method
The qemu_chr_open_socket_fd() method multiplexes three different
actions into one method. The socket_try_connect() method is one
of its callers, but it only ever want one specific action
performed. By inlining that action into socket_try_connect()
we see that there is not in fact any failure scenario, so there
is not even any reason for socket_try_connect to exist. Just
inline the asynchronous connection attempts directly at the
places that need them. This shortens & clarifies the code.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:19:07 +00:00
Daniel P. Berrange
f50dfe457f char: remove qemu_chr_finish_socket_connection method
The qemu_chr_finish_socket_connection method is multiplexing two
different actions into one method. Each caller of it though, only
wants one specific action. The code is shorter & clearer if we
thus remove the method and just inline the specific actions
where needed.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:19:07 +00:00
Paolo Bonzini
a589720567 io: implement socket watch for win32 using WSAEventSelect+select
On Win32 we cannot directly poll on socket handles. Instead we
create a Win32 event object and associate the socket handle with
the event. When the event signals readyness we then have to
use select to determine which events are ready. Creating Win32
events is moderately heavyweight, so we don't want todo it
every time we create a GSource, so this associates a single
event with a QIOChannel.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:19:07 +00:00
Daniel P. Berrange
30fd3e2790 io: remove checking of EWOULDBLOCK
Since we now canonicalize WSAEWOULDBLOCK into EAGAIN there is
no longer any need to explicitly check EWOULDBLOCK for Win32.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:19:05 +00:00
Daniel P. Berrange
de7971ffb9 io: use qemu_accept to ensure SOCK_CLOEXEC is set
The QIOChannelSocket code mistakenly uses the bare accept()
function which does not set SOCK_CLOEXEC.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:11:40 +00:00
Paolo Bonzini
b83b68a013 io: introduce qio_channel_create_socket_watch
Sockets are not in the same namespace as file descriptors on Windows.
As an initial step, introduce separate APIs for file descriptor and
socket watches.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-10 17:10:19 +00:00
Paolo Bonzini
e560d141ab io: pass HANDLE to g_source_add_poll on Win32
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-10 17:10:19 +00:00
Daniel P. Berrange
5151d23e65 io: fix copy+paste mistake in socket error message
s/write/read/ in the error message reported after
readmsg() fails

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:10:18 +00:00
Daniel P. Berrange
294bbbb425 io: assert errors before asserting content in I/O test
When checking the results of an I/O operation test, assert that
the error objects are NULL before asserting on the content. This
is found to give more useful indication of the problem when
diagnosing test failures.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:10:18 +00:00
Daniel P. Berrange
256920eb94 io: set correct error object in background reader test thread
The reader thread was accidentally setting the error pointer
intended for the writer thread. If both threads set errors
this would result in QEMU abort'ing due to the error already
being set.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:10:18 +00:00
Daniel P. Berrange
a9d5aed12d io: wait for incoming client in socket test
Exercise the GSource code for server sockets by calling
qio_channel_wait() prior to accepting the incoming client.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:10:18 +00:00
Daniel P. Berrange
abc981bf29 io: bind to socket before creating QIOChannelSocket
In the QIOChannelSocket test we create a socket file
descriptor and then try to create a QIOChannelSocket.
This works on Linux, but fails on Win32 because it is
not valid to call getsockname() on an unbound socket.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:10:18 +00:00
Daniel P. Berrange
5838d66e73 io: initialize sockets in test program
The win32 sockets layer requires that socket_init() is called
otherwise nothing will work.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:10:18 +00:00
Daniel P. Berrange
0a27af918b io: use bind() to check for IPv4/6 availability
Currently the test-io-channel-socket.c test uses getifaddrs
to see if an IPv4/6 address is present on any host NIC, as
a way to determine if IPv4/6 sockets can be used. This is
problematic because getifaddrs is not available on Win32.

Rather than testing indirectly via getifaddrs, just create
a socket and try to bind() to the loopback address instead.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:10:18 +00:00
Daniel P. Berrange
c619644067 osdep: fix socket_error() to work with Mingw64
Historically QEMU has had a socket_error() macro that was
defined to map to WSASocketError(). The os-win32.h header
file would define errno constants that mapped to the
WSA error constants. This worked fine with Mingw32 since
its header files never defined any errno values, nor did
it even provide an errno.h.  So callers of socket_error()
could match on traditional Exxxx constants and it would
all "just work".

With Mingw64 though, things work rather differently. First
there is an errno.h file which defines all the traditional
errno constants you'd expect from a UNIX platform. There
is then a winerror.h which defined the WSA error constants.
Crucially the WSAExxxx errno values in winerror.h do not
match the Exxxx errno values in error.h.

If QEMU had only imported winerror.h it would still work,
but the qemu/osdep.h file unconditionally imports errno.h.
So callers of socket_error() will get now WSAExxxx values
back and compare them to the Exxx constants. This will
always fail silently at runtime.

To solve this QEMU needs to stop assuming the WSAExxxx
constant values match the Exxx constant values. Thus the
socket_error() macro is turned into a small function that
re-maps WSAExxxx values into Exxx.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-10 17:10:17 +00:00
Alex Williamson
469002263a vfio: Wrap VFIO_DEVICE_GET_REGION_INFO
In preparation for supporting capability chains on regions, wrap
ioctl(VFIO_DEVICE_GET_REGION_INFO) so we don't duplicate the code for
each caller.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2016-03-10 09:39:07 -07:00
Alex Williamson
7df9381b7a vfio: Add sysfsdev property for pci & platform
vfio-pci currently requires a host= parameter, which comes in the
form of a PCI address in [domain:]<bus:slot.function> notation.  We
expect to find a matching entry in sysfs for that under
/sys/bus/pci/devices/.  vfio-platform takes a similar approach, but
defines the host= parameter to be a string, which can be matched
directly under /sys/bus/platform/devices/.  On the PCI side, we have
some interest in using vfio to expose vGPU devices.  These are not
actual discrete PCI devices, so they don't have a compatible host PCI
bus address or a device link where QEMU wants to look for it.  There's
also really no requirement that vfio can only be used to expose
physical devices, a new vfio bus and iommu driver could expose a
completely emulated device.  To fit within the vfio framework, it
would need a kernel struct device and associated IOMMU group, but
those are easy constraints to manage.

To support such devices, which would include vGPUs, that honor the
VFIO PCI programming API, but are not necessarily backed by a unique
PCI address, add support for specifying any device in sysfs.  The
vfio API already has support for probing the device type to ensure
compatibility with either vfio-pci or vfio-platform.

With this, a vfio-pci device could either be specified as:

-device vfio-pci,host=02:00.0

or

-device vfio-pci,sysfsdev=/sys/devices/pci0000:00/0000:00:1c.0/0000:02:00.0

or even

-device vfio-pci,sysfsdev=/sys/bus/pci/devices/0000:02:00.0

When vGPU support comes along, this might look something more like:

-device vfio-pci,sysfsdev=/sys/devices/virtual/intel-vgpu/vgpu0@0000:00:02.0

NB - This is only a made up example path

The same change is made for vfio-platform, specifying sysfsdev has
precedence over the old host option.

Tested-by: Eric Auger <eric.auger@linaro.org>
Reviewed-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2016-03-10 09:39:07 -07:00
Cornelia Huck
75cfb3bb41 s390x/cpu: use g_new0
Let's use g_new0 to allocate cpu_states.

Suggested-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-10 12:02:02 +01:00
Janosch Frank
8b8a61ad8c s390x: Introduce S390MachineClass
As we now have the new machine definitions, that let us disable/enable
machine options more easily, we need a way to save them and make them
publicly available.

The new s390-virtio-ccw.h header exports the s390 ccw machine state
and class, so they can be easily used in other C files.

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-10 10:37:16 +01:00
Janosch Frank
4fca654872 s390x: Introduce machine definition macros
Most of the machine definition code looks the same between different
machine versions. The new DEFINE_CCW_MACHINE macro makes defining a
new machine easier by inserting standard machine version
definitions. This also makes it possible to propagate values between
machine versions.

The patch is inspired by code from hw/ppc/spapr.c

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-10 10:37:16 +01:00
Eugene (jno) Dvurechenski
3a3c752f0b pc-bios/s390-ccw: fix old bug in ptr increment
We need to increment by the size of the structure, whereas 'ns' is 'uint8_t *'.

Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-10 10:37:16 +01:00
Matthew Rosato
a006b67fe4 s390x/cpu: Allow hotplug of CPUs
Implement cpu hotplug routine and add the machine hook.

Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <1457112875-5209-8-git-send-email-mjrosato@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-10 10:37:15 +01:00
Matthew Rosato
96b1a8bb55 s390x/cpu: Add error handling to cpu creation
Check for and propogate errors during s390 cpu creation.

Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Message-Id: <1457112875-5209-7-git-send-email-mjrosato@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-10 10:37:15 +01:00
Matthew Rosato
502edbf834 s390x/cpu: Add CPU property links
Link each CPUState as property machine/cpu[n] during initialization.
Add a hotplug handler to s390-virtio-ccw machine and set the
state during plug.

Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <1457112875-5209-6-git-send-email-mjrosato@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-10 10:37:15 +01:00
Matthew Rosato
25637d31f2 s390x/cpu: Tolerate max_cpus
Once hotplug is enabled, interrupts may come in for CPUs
with an address > smp_cpus.  Allocate for this and allow
search routines to look beyond smp_cpus.

Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Message-Id: <1457112875-5209-5-git-send-email-mjrosato@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-10 10:37:15 +01:00
Matthew Rosato
c6644fc88b s390x/cpu: Get rid of side effects when creating a vcpu
In preparation for hotplug, defer some CPU initialization
until the device is actually being realized, including
cpu_exec_init.

Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <1457112875-5209-4-git-send-email-mjrosato@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-10 10:37:15 +01:00
Matthew Rosato
ef3027affc s390x/cpu: Set initial CPU state in common routine
Both initial and hotplugged CPUs need to set the same initial
state.

Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <1457112875-5209-3-git-send-email-mjrosato@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-10 10:37:15 +01:00
Matthew Rosato
d2eae20790 s390x/cpu: Cleanup init in preparation for hotplug
Ensure a valid cpu_model is set upfront by setting the
default value directly into the MachineState when none is
specified.  This is needed to ensure hotplugged CPUs share
the same cpu_model.

Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <1457112875-5209-2-git-send-email-mjrosato@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-10 10:37:15 +01:00
Peter Maydell
a648c13738 Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20160309-1' into staging
add linux evdev support, vnc and console fixes.

# gpg: Signature made Wed 09 Mar 2016 09:02:47 GMT using RSA key ID D3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"

* remotes/kraxel/tags/pull-ui-20160309-1:
  ui/console: add escape sequence \e[5, 6n
  input-linux: add switch to enable auto-repeat events
  input-linux: add option to toggle grab on all devices
  input: linux evdev support
  vnc: send cursor when a new client is connecting

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-10 02:51:14 +00:00
Ren Kimura
58aa7d8e44 ui/console: add escape sequence \e[5, 6n
Add support of escape sequence "\e[5n" and "\e[6n" to console.
"\e[5n" reports status of console and it always succeed
in virtual console.
"\e[6n" reports now cursor position in console.

Signed-off-by: Ren Kimura <rkx1209dev@gmail.com>
Message-id: 1457466681-7714-2-git-send-email-rkx1209dev@gmail.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-09 09:35:56 +01:00
Peter Maydell
4ba364b472 Merge remote-tracking branch 'remotes/thibault/tags/samuel-thibault' into staging
Add Samuel Thibault as slirp maintainer

# gpg: Signature made Tue 08 Mar 2016 20:43:01 GMT using RSA key ID FB6B2F1D
# gpg: Good signature from "Samuel Thibault <samuel.thibault@gnu.org>"
# gpg:                 aka "Samuel Thibault <sthibault@debian.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@inria.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@labri.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@ens-lyon.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 900C B024 B679 31D4 0F82  304B D017 8C76 7D06 9EE6
#      Subkey fingerprint: F632 74CD C630 0873 CB3D  29D9 E3E5 1CE8 FB6B 2F1D

* remotes/thibault/tags/samuel-thibault:
  MAINTAINERS: Add Samuel Thibault as slirp maintainer

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-09 05:14:55 +00:00
Peter Maydell
8519c8e073 Merge remote-tracking branch 'remotes/amit-migration/tags/migration-for-2.6-6' into staging
migration:
* add avx2 instruction optimization, speeds up zero-page checking on
  compatible architectures and compilers (gcc 4.9+)
* add additional postcopy stats to 'info migrate' output

# gpg: Signature made Tue 08 Mar 2016 11:29:48 GMT using RSA key ID 854083B6
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"

* remotes/amit-migration/tags/migration-for-2.6-6:
  cutils: add avx2 instruction optimization
  configure: detect ifunc and avx2 attribute
  Postcopy: Fix sync count in info migrate

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-09 01:07:16 +00:00
Peter Maydell
3293680dc7 Merge remote-tracking branch 'remotes/kraxel/tags/pull-fw-cfg-20160308-1' into staging
acpi: add fw_cfg device node to dsdt

# gpg: Signature made Tue 08 Mar 2016 11:15:42 GMT using RSA key ID D3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"

* remotes/kraxel/tags/pull-fw-cfg-20160308-1:
  tests: update acpi test data
  fw_cfg: document ACPI device node information
  acpi: arm: add fw_cfg device node to dsdt
  acpi: pc: add fw_cfg device node to dsdt
  pc: fw_cfg: move ioport base constant to pc.h
  fw_cfg: expose control register size in fw_cfg.h

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-09 00:44:43 +00:00
Peter Maydell
5763795f93 Merge remote-tracking branch 'remotes/amit-virtio-rng/tags/rng-for-2.6-2' into staging
rng: use simpleq instead of gslist

# gpg: Signature made Tue 08 Mar 2016 10:51:23 GMT using RSA key ID 854083B6
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"

* remotes/amit-virtio-rng/tags/rng-for-2.6-2:
  rng: switch request queue to QSIMPLEQ

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-09 00:21:17 +00:00
Samuel Thibault
eda509fa0a MAINTAINERS: Add Samuel Thibault as slirp maintainer
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Acked-by: Jan Kiszka <jan.kiszka@siemens.com>
2016-03-08 21:39:04 +01:00
Liang Li
28b90d9c19 cutils: add avx2 instruction optimization
buffer_find_nonzero_offset() is a hot function during live migration.
Now it use SSE2 instructions for optimization. For platform supports
AVX2 instructions, use AVX2 instructions for optimization can help
to improve the performance of buffer_find_nonzero_offset() about 30%
comparing to SSE2.

Live migration can be faster with this optimization, the test result
shows that for an 8GiB RAM idle guest just boots, this patch can help
to shorten the total live migration time about 6%.

This patch use the ifunc mechanism to select the proper function when
running, for platform supports AVX2, execute the AVX2 instructions,
else, execute the original instructions.

Signed-off-by: Liang Li <liang.z.li@intel.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Suggested-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1457416397-26671-3-git-send-email-liang.z.li@intel.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-08 16:53:26 +05:30
Liang Li
99f2dbd343 configure: detect ifunc and avx2 attribute
Detect if the compiler can support the ifun and avx2, if so, set
CONFIG_AVX2_OPT which will be used to turn on the avx2 instruction
optimization.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Liang Li <liang.z.li@intel.com>
Message-Id: <1457416397-26671-2-git-send-email-liang.z.li@intel.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-08 16:53:26 +05:30
Dr. David Alan Gilbert
614e8018ed Postcopy: Fix sync count in info migrate
I'd missed the sync count off in the postcopy case.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Message-id: 1456394631-18010-1-git-send-email-dgilbert@redhat.com
Message-Id: <1456394631-18010-1-git-send-email-dgilbert@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-08 16:52:27 +05:30
Gerd Hoffmann
a6ccabd676 input-linux: add switch to enable auto-repeat events
Enable with "-input-linux /dev/input/${device},repeat=on".

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1457087116-4379-4-git-send-email-kraxel@redhat.com
2016-03-08 12:20:11 +01:00
Gerd Hoffmann
46d921bebe input-linux: add option to toggle grab on all devices
Maintain a list of all input devices.  Add an option to make grab
work across all devices (so toggling grab on the keybard can switch
over the mouse too).

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1457087116-4379-3-git-send-email-kraxel@redhat.com
2016-03-08 12:20:11 +01:00
Gerd Hoffmann
e0d2bd5195 input: linux evdev support
This patch adds support for reading input events directly from linux
evdev devices and forward them to the guest.  Unlike virtio-input-host
which simply passes on all events to the guest without looking at them
this will interpret the events and feed them into the qemu input
subsystem.

Therefore this is limited to what the qemu input subsystem and the
emulated input devices are able to handle.  Also there is no support for
absolute coordinates (tablet/touchscreen).  So we are talking here about
basic mouse and keyboard support.

The advantage is that it'll work without virtio-input drivers in the
guest, the events are delivered to the usual ps/2 or usb input devices
(depending on what the machine happens to have).  And for keyboards
qemu is able to switch the keyboard between guest and host on hotkey.
The hotkey is hard-coded for now (both control keys), initialy the
guest owns the keyboard.

Probably most useful when assigning vga devices with vfio and using a
physical monitor instead of vnc/spice/gtk as guest display.

Usage:  Add '-input-linux /dev/input/event<nr>' to the qemu command
line.  Note that udev has rules which populate /dev/input/by-{id,path}
with static names, which might be more convinient to use.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1457087116-4379-2-git-send-email-kraxel@redhat.com
2016-03-08 12:20:11 +01:00
Gerd Hoffmann
a60c785608 tests: update acpi test data
using tests/acpi-test-data/rebuild-expected-aml.sh

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-08 12:15:27 +01:00
Gabriel L. Somlo
36a43ea83b fw_cfg: document ACPI device node information
Signed-off-by: Gabriel Somlo <somlo@cmu.edu>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Marc Marí <markmb@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-id: 1455906029-25565-6-git-send-email-somlo@cmu.edu
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-08 12:15:22 +01:00
Gabriel L. Somlo
70bee80d6b acpi: arm: add fw_cfg device node to dsdt
Add a fw_cfg device node to the ACPI DSDT. This is mostly
informational, as the authoritative fw_cfg MMIO region(s)
are listed in the Device Tree. However, since we are building
ACPI tables, we might as well be thorough while at it...

Signed-off-by: Gabriel Somlo <somlo@cmu.edu>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Marc Marí <markmb@redhat.com>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-id: 1455906029-25565-5-git-send-email-somlo@cmu.edu
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-08 12:15:15 +01:00
Gabriel L. Somlo
e2ec75685c acpi: pc: add fw_cfg device node to dsdt
Add a fw_cfg device node to the ACPI DSDT. While the guest-side
firmware can't utilize this information (since it has to access
the hard-coded fw_cfg device to extract ACPI tables to begin with),
having fw_cfg listed in ACPI will help the guest kernel keep a more
accurate inventory of in-use IO port regions.

Signed-off-by: Gabriel Somlo <somlo@cmu.edu>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Marc Marí <markmb@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-id: 1455906029-25565-4-git-send-email-somlo@cmu.edu
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-08 12:15:09 +01:00
Gabriel L. Somlo
305ae88895 pc: fw_cfg: move ioport base constant to pc.h
Move BIOS_CFG_IOPORT define from pc.c to pc.h, and rename
it to FW_CFG_IO_BASE.

Cc: Marc Marí <markmb@redhat.com>
Signed-off-by: Gabriel Somlo <somlo@cmu.edu>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Marc Marí <markmb@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-id: 1455906029-25565-3-git-send-email-somlo@cmu.edu
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-08 12:14:49 +01:00
Peter Maydell
d1cc881d54 Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Tue 08 Mar 2016 07:46:08 GMT using RSA key ID 398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  net: check packet payload length
  filter-buffer: Add status_changed callback processing
  filter: Add 'status' property for filter object
  rocker: allow user to specify rocker world by property
  rocker: add name field into WorldOps ale let world specify its name
  rocker: return -ENOMEM in case of some world alloc fails
  rocker: forbid to change world type
  net: netmap: probe netmap interface for virtio-net header
  net: simplify net_init_tap_one logic
  MAINTAINERS: Add entries for include/net/ files
  net: filter: correctly remove filter from the list during finalization
  net: ne2000: check ring buffer control registers

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-08 10:25:50 +00:00
Gabriel L. Somlo
ce9a2aa372 fw_cfg: expose control register size in fw_cfg.h
Expose the size of the control register (FW_CFG_CTL_SIZE) in fw_cfg.h.
Add comment to fw_cfg_io_realize() pointing out that since the
8-bit data register is always subsumed by the 16-bit control
register in the port I/O case, we use the control register width
as the *total* width of the (classic, non-DMA) port I/O region reserved
for the device.

Cc: Marc Marí <markmb@redhat.com>
Signed-off-by: Gabriel Somlo <somlo@cmu.edu>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Marc Marí <markmb@redhat.com>
Message-id: 1455906029-25565-2-git-send-email-somlo@cmu.edu
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-08 10:46:30 +01:00
Frediano Ziglio
91ec41dc3f vnc: send cursor when a new client is connecting
If you have hardware cursor and you are reconnecting the VNC client
you need to send the cursor. Failing to do so make the cursor invisible
till is changed.

Signed-off-by: Frediano Ziglio <fziglio@redhat.com>
Message-id: 1456929142-14033-1-git-send-email-fziglio@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-08 10:45:01 +01:00
Prasad J Pandit
362786f14a net: check packet payload length
While computing IP checksum, 'net_checksum_calculate' reads
payload length from the packet. It could exceed the given 'data'
buffer size. Add a check to avoid it.

Reported-by: Liu Ling <liuling-it@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:18 +08:00
zhanghailiang
f1b2bc601a filter-buffer: Add status_changed callback processing
While the status of filter-buffer changing from 'on' to 'off',
it need to release all the buffered packets, and delete the related
timer, while switch from 'off' to 'on', it need to resume the release
packets timer.

Here, we extract the process of setup timer into a new helper,
which will be used in the new status_changed callback.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:18 +08:00
zhanghailiang
338d3f415e filter: Add 'status' property for filter object
With this property, users can control if this filter is 'on'
or 'off'. The default behavior for filter is 'on'.

For some types of filters, they may need to react to status changing,
So here, we introduced status changing callback/notifier for filter class.

We will skip the disabled ('off') filter when delivering packets in net layer.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:18 +08:00
Jiri Pirko
9fe7101f1d rocker: allow user to specify rocker world by property
Add property to specify rocker world. All ports will be assigned to this
world.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:18 +08:00
Jiri Pirko
031143c8d5 rocker: add name field into WorldOps ale let world specify its name
Also use this in world_name getter function.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:18 +08:00
Jiri Pirko
39e0c4f47d rocker: return -ENOMEM in case of some world alloc fails
Until now, 0 is returned in this error case. Fix it ro return -ENOMEM.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:18 +08:00
Jiri Pirko
0ab9cd9a4b rocker: forbid to change world type
Port to world assignment should be permitted only by qemu user. Driver
should not be able to do it, so forbid that possibility.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:18 +08:00
Vincenzo Maffione
9fbad2ca36 net: netmap: probe netmap interface for virtio-net header
Previous implementation of has_ufo, has_vnet_hdr, has_vnet_hdr_len, etc.
did not really probe for virtio-net header support for the netmap
interface attached to the backend. These callbacks were correct for
VALE ports, but incorrect for hardware NICs, pipes, monitors, etc.

This patch fixes the implementation to work properly with all kinds
of netmap ports.

Signed-off-by: Vincenzo Maffione <v.maffione@gmail.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:18 +08:00
Paolo Bonzini
3a2d44f6dd net: simplify net_init_tap_one logic
net_init_tap_one receives in vhostfdname a fd name from vhostfd= or
vhostfds=, or NULL if there is no vhostfd=/vhostfds=.  It is simpler
to just check vhostfdname, than it is to check for vhostfd= or
vhostfds=.  This also calms down Coverity, which otherwise thinks
that monitor_fd_param could dereference a NULL vhostfdname.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:09 +08:00
Thomas Huth
d24b2b1ccc MAINTAINERS: Add entries for include/net/ files
The include/net/ files correspond to the files in the net/ directory,
thus there should be corresponding entries in the MAINTAINERS file.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:09 +08:00
Jason Wang
5dd2d45e34 net: filter: correctly remove filter from the list during finalization
Qemu may crash when we want to add two filters on the same netdev but
the initialization of second fails (e.g missing parameters):

./qemu-system-x86_64 -netdev user,id=un0 \
 -object filter-buffer,id=f0,netdev=un0,interval=10 \
 -object filter-buffer,id=f1,netdev=un0
Segmentation fault (core dumped)

This is because we don't check whether or not the filter was in the
list of netdev. This patch fixes this.

Cc: Yang Hongyang <hongyang.yang@easystack.cn>
Reviewed-by: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:09 +08:00
Prasad J Pandit
415ab35a44 net: ne2000: check ring buffer control registers
Ne2000 NIC uses ring buffer of NE2000_MEM_SIZE(49152)
bytes to process network packets. Registers PSTART & PSTOP
define ring buffer size & location. Setting these registers
to invalid values could lead to infinite loop or OOB r/w
access issues. Add check to avoid it.

Reported-by: Yang Hongke <yanghongke@huawei.com>
Tested-by: Yang Hongke <yanghongke@huawei.com>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-03-08 15:34:09 +08:00
Ladi Prosek
443590c204 rng: switch request queue to QSIMPLEQ
QSIMPLEQ supports appending to tail in O(1) and is intrusive so
it doesn't require extra memory allocations for the bookkeeping
data.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1457010971-24771-1-git-send-email-lprosek@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-08 12:54:14 +05:30
Peter Maydell
97556fe80e Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* RAMBlock vs. MemoryRegion cleanups from Fam
* mru_section optimization from Fam
* memory.txt improvements from Peter and Xiaoqiang
* i8257 fix from Hervé
* -daemonize fix
* Cleanups and small fixes from Alex, Praneith, Wei

# gpg: Signature made Mon 07 Mar 2016 17:08:59 GMT using RSA key ID 78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"

* remotes/bonzini/tags/for-upstream:
  scsi-bus: Remove tape command from scsi_req_xfer
  kvm/irqchip: use bitmap utility for gsi tracking
  MAINTAINERS: Add entry for include/sysemu/kvm*.h
  doc/memory.txt: correct description of MemoryRegionOps fields
  doc/memory.txt: correct a logic error
  icount: possible options for sleep are on or off
  exec: Introduce AddressSpaceDispatch.mru_section
  exec: Factor out section_covers_addr
  exec: Pass RAMBlock pointer to qemu_ram_free
  memory: Drop MemoryRegion.ram_addr
  memory: Implement memory_region_get_ram_addr with mr->ram_block
  memory: Move assignment to ram_block to memory_region_init_*
  exec: Return RAMBlock pointer from allocating functions
  i8257: fix Terminal Count status
  log: do not log if QEMU is daemonized but without -D

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-08 04:53:37 +00:00
Alex Pyrgiotis
4792b7e9d5 scsi-bus: Remove tape command from scsi_req_xfer
Remove the RECOVER_BUFFERED_DATA command from the list of commands that
are handled by scsi_req_xfer(). Given that this command is
tape-specific, it should be handled only by scsi_stream_req_xfer().

Signed-off-by: Alex Pyrgiotis <apyrgio@arrikto.com>

Message-Id: <1457365822-22435-1-git-send-email-apyrgio@arrikto.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 17:56:23 +01:00
Wei Yang
8269fb7082 kvm/irqchip: use bitmap utility for gsi tracking
By using utilities in bitops and bitmap, this patch tries to make it more
friendly to audience. No functional change.

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Message-Id: <1457229445-25954-1-git-send-email-richard.weiyang@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 15:18:22 +01:00
Thomas Huth
a95e9a485b MAINTAINERS: Add entry for include/sysemu/kvm*.h
The include/sysemu/kvm*.h header files should be part of
the overall KVM section.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1456403605-26587-1-git-send-email-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:26:38 +01:00
Peter Maydell
ef00bdaf8c doc/memory.txt: correct description of MemoryRegionOps fields
Probably what happened was that when the API was being designed it
started off with an 'aligned' field, and then later the field name
and semantics were changed but the docs weren't updated to match.

Similarly, cpu_register_io_memory() does not exist anymore, so
clarify the documentation for .old_mmio.

Reported-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:26:38 +01:00
xiaoqiang zhao
8210f5f6f5 doc/memory.txt: correct a logic error
In the regions overlap example, region B has a higher priority thus
should has a larger priority number than C.

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Message-Id: <1456476051-15121-1-git-send-email-zxq_yx_007@163.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:26:38 +01:00
Pranith Kumar
778d9f9b25 icount: possible options for sleep are on or off
icount sleep takes on or off as options. A few places mention sleep=no
which is not accepted. This patch corrects them.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <1456499811-16819-1-git-send-email-bobby.prani@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:26:38 +01:00
Fam Zheng
729633c2bc exec: Introduce AddressSpaceDispatch.mru_section
Under heavy workloads the lookup will likely end up with the same
MemoryRegionSection from last time. Using a pointer to cache the result,
like ram_list.mru_block, significantly reduces cost of
address_space_translate.

During address space topology update, as->dispatch will be reallocated
so the pointer is invalidated automatically.

Perf reports a visible drop on the cpu usage, because phys_page_find is
not called.  Before:

   2.35%  qemu-system-x86_64       [.] phys_page_find
   0.97%  qemu-system-x86_64       [.] address_space_translate_internal
   0.95%  qemu-system-x86_64       [.] address_space_translate
   0.55%  qemu-system-x86_64       [.] address_space_lookup_region

After:

   0.97%  qemu-system-x86_64       [.] address_space_translate_internal
   0.97%  qemu-system-x86_64       [.] address_space_lookup_region
   0.84%  qemu-system-x86_64       [.] address_space_translate

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1456813104-25902-8-git-send-email-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:26:37 +01:00
Fam Zheng
29cb533d8c exec: Factor out section_covers_addr
This will be shared by the next patch.

Also add a comment explaining the unobvious condition on "size.hi".

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1456813104-25902-7-git-send-email-famz@redhat.com>
[Small change to the comment. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:26:37 +01:00
Fam Zheng
f1060c55bf exec: Pass RAMBlock pointer to qemu_ram_free
The only caller now knows exactly which RAMBlock to free, so it's not
necessary to do the lookup.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1456813104-25902-6-git-send-email-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:26:37 +01:00
Fam Zheng
8e41fb63c5 memory: Drop MemoryRegion.ram_addr
All references to mr->ram_addr are replaced by
memory_region_get_ram_addr(mr) (except for a few assertions that are
replaced with mr->ram_block).

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1456813104-25902-5-git-send-email-famz@redhat.com>
Acked-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:26:29 +01:00
Fam Zheng
7ebb2745ac memory: Implement memory_region_get_ram_addr with mr->ram_block
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1456813104-25902-4-git-send-email-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:18:28 +01:00
Fam Zheng
0a75601853 memory: Move assignment to ram_block to memory_region_init_*
We don't force "const" qualifiers with pointers in QEMU, but it's still
good to keep a clean function interface. Assigning to mr->ram_block is
in this sense ugly - one initializer mutating its owning object's state.

Move it to memory_region_init_*, where mr->ram_addr is assigned.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1456813104-25902-3-git-send-email-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:18:28 +01:00
Fam Zheng
528f46af6e exec: Return RAMBlock pointer from allocating functions
Previously we return RAMBlock.offset; now return the pointer to the
whole structure.

ram_block_add returns void now, error is completely passed with errp.

Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1456813104-25902-2-git-send-email-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:18:28 +01:00
Hervé Poussineau
bb8f32c031 i8257: fix Terminal Count status
When a DMA transfer is done (ie all bytes have been transfered), the corresponding
Terminal Count bit must be set in the status register.
This bit is already cleared in i8257_read_cont and i8257_write_cont when required.

This fixes (at least) floppy transfer in IBM 40p firmware, which checks in DMA
controller if everything went fine.

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Message-Id: <1456404332-31556-1-git-send-email-hpoussin@reactos.org>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:18:28 +01:00
Paolo Bonzini
c586eac336 log: do not log if QEMU is daemonized but without -D
Commit 96c33a4 ("log: Redirect stderr to logfile if deamonized",
2016-02-22) wanted to move stderr of a daemonized QEMU to the file
specified with -D.

However, if -D was not passed, the patch had the side effect of not
redirecting stderr to /dev/null.  This happened because qemu_logfile
was set to stderr rather than the expected value of NULL.  The fix
is simply in the "if" condition of do_qemu_set_log; the "if" for
closing the file is also changed to match.

Reported-by: Jan Tomko <jtomko@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 13:18:28 +01:00
Peter Maydell
1464ad45cd Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2016-03-04' into staging
QAPI patches for 2016-03-04

# gpg: Signature made Sat 05 Mar 2016 09:47:19 GMT using RSA key ID EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"

* remotes/armbru/tags/pull-qapi-2016-03-04:
  qapi: Drop useless 'data' member of unions
  chardev: Drop useless ChardevDummy type
  qapi: Avoid use of 'data' member of QAPI unions
  ui: Shorten references into InputEvent
  util: Shorten references into SocketAddress
  chardev: Shorten references into ChardevBackend
  qapi: Update docs to match recent generator changes
  qapi-visit: Expose visit_type_FOO_members()
  qapi: Rename 'fields' to 'members' in generated C code
  qapi: Rename 'fields' to 'members' in generator
  qapi-dealloc: Reduce use outside of generated code
  qmp-shell: fix pretty printing of JSON responses

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-06 11:53:27 +00:00
Eric Blake
48eb62a74f qapi: Drop useless 'data' member of unions
We started moving away from the use of the 'void *data' member
in the C union corresponding to a QAPI union back in commit
544a373; recent commits have gotten rid of other uses.  Now
that it is completely unused, we can remove the member itself
as well as the FIXME comment.  Update the testsuite to drop the
negative test union-clash-data.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1457021813-10704-11-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-05 10:42:06 +01:00
Eric Blake
b1918fbb1c chardev: Drop useless ChardevDummy type
Commit d0d7708b made ChardevDummy be an empty wrapper type around
ChardevCommon.  But there is no technical reason for this indirection,
so simplify the code by directly using the base type.

Also change the fallback assignment to assign u.null rather than
u.data, since a future patch will remove the data member of the C
struct generated for QAPI unions.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1457106160-23614-1-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-05 10:42:03 +01:00
Eric Blake
10f759079e qapi: Avoid use of 'data' member of QAPI unions
QAPI code generators currently create a 'void *data' member as
part of the anonymous union embedded in the C struct corresponding
to a QAPI union.  However, directly assigning to this member of
the union feels a bit fishy, when we can assign to another member
of the struct instead.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1457021813-10704-9-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-05 10:41:58 +01:00
Eric Blake
b5a1b44318 ui: Shorten references into InputEvent
An upcoming patch will alter how simple unions, like InputEvent, are
laid out, which will impact all lines of the form 'evt->u.XXX'
(expanding it to the longer 'evt->u.XXX.data').  For better
legibility in that patch, and less need for line wrapping, it's better
to use a temporary variable to reduce the effect of a layout change to
just the variable initializations, rather than every reference within
an InputEvent.

There was one instance in hid.c:hid_pointer_event() where the code
was referring to evt->u.rel inside the case label where evt->u.abs
is the correct name; thankfully, both members of the union have the
same type, so it happened to work, but it is now cleaner.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1457021813-10704-8-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-05 10:41:55 +01:00
Eric Blake
0399293e5b util: Shorten references into SocketAddress
An upcoming patch will alter how simple unions, like SocketAddress,
are laid out, which will impact all lines of the form 'addr->u.XXX'
(expanding it to the longer 'addr->u.XXX.data').  For better
legibility in that patch, and less need for line wrapping, it's better
to use a temporary variable to reduce the effect of a layout change to
just the variable initializations, rather than every reference within
a SocketAddress.  Also, take advantage of some C99 initialization where
it makes sense (simplifying g_new0() to g_new()).

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1457021813-10704-7-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-05 10:41:52 +01:00
Eric Blake
f194a1ae53 chardev: Shorten references into ChardevBackend
An upcoming patch will alter how simple unions, like ChardevBackend,
are laid out, which will impact all lines of the form 'backend->u.XXX'
(expanding it to the longer 'backend->u.XXX.data').  For better
legibility in that patch, and less need for line wrapping, it's better
to use a temporary variable to reduce the effect of a layout change to
just the variable initializations, rather than every reference within
a ChardevBackend.  It doesn't hurt that this also makes the code more
consistent: some clients touched here already had a temporary variable
but weren't using it.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-By: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1457021813-10704-6-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-05 10:41:47 +01:00
Eric Blake
9ee86b8526 qapi: Update docs to match recent generator changes
Several commits have been changing the generator, but not updating
the docs to match:
- The implicit tag member is named "type", not "kind".  Screwed up in
commit 39a1815.
- Commit 9f08c8ec made list types lazy, and thereby dropped
UserDefOneList if nothing explicitly uses the list type.
- Commit 51e72bc1 switched the parameter order with 'name' occurring
earlier.
- Commit e65d89bf changed the layout of UserDefOneList.
- Prefer the term 'member' over 'field'.
- We now expose visit_type_FOO_members() for objects.
- etc.

Rework the examples to show slightly more output (we don't want to
show too much; that's what the testsuite is for), and regenerate the
output to match all recent changes.  Also, rearrange output to show
.h files before .c (understanding the interface first often makes
the implementation easier to follow).

Reported-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457021813-10704-5-git-send-email-eblake@redhat.com>
2016-03-05 10:41:16 +01:00
Eric Blake
4d91e9115c qapi-visit: Expose visit_type_FOO_members()
Dan Berrange reported a case where he needs to work with a
QCryptoBlockOptions union type using the OptsVisitor, but only
visit one of the branches of that type (the discriminator is not
visited directly, but learned externally).  When things were
boxed, it was easy: just visit the variant directly, which took
care of both allocating the variant and visiting its members, then
store that pointer in the union type.  But now that things are
unboxed, we need a way to visit the members without allocation,
done by exposing visit_type_FOO_members() to the user.

Before the patch, we had quite a bit of code associated with
object_members_seen to make sure that a declaration of the helper
was in scope before any use of the function.  But now that the
helper is public and declared in the header, the .c file no
longer needs to worry about topological sorting (the helper is
always in scope), which leads to some nice cleanups.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1457021813-10704-4-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-05 10:41:13 +01:00
Eric Blake
c81200b014 qapi: Rename 'fields' to 'members' in generated C code
C types and JSON objects don't have fields, but members.  We
shouldn't gratuitously invent terminology.  This patch is a
strict renaming of static genarated functions, plus the naming
of the dummy filler member for empty structs, before the next
patch exposes some of that naming to the rest of the code base.

Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1457021813-10704-3-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-05 10:41:09 +01:00
Eric Blake
14f00c6c49 qapi: Rename 'fields' to 'members' in generator
C types and JSON objects don't have fields, but members.  We
shouldn't gratuitously invent terminology.  This patch is a
strict renaming of generator code internals (including testsuite
comments), before later patches rename C interfaces.

No change to generated code with this patch.

Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1457021813-10704-2-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-05 10:40:52 +01:00
Eric Blake
96a1616c85 qapi-dealloc: Reduce use outside of generated code
No need to roll our own use of the dealloc visitors when we can
just directly use the qapi_free_FOO() functions that do what we
want in one line.

In net.c, inline net_visit() into its remaining lone caller.

After this patch, test-visitor-serialization.c is the only
non-generated file that needs to use a dealloc visitor, because
it is testing low level aspects of the visitor interface.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1456262075-3311-2-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-04 17:16:32 +01:00
Daniel P. Berrange
e55250c6cb qmp-shell: fix pretty printing of JSON responses
Pretty printing of JSON responses is important to be able to understand
large responses from query commands in particular. Unfortunately this
was broken during the addition of the verbose flag in

  commit 1ceca07e48
  Author: John Snow <jsnow@redhat.com>
  Date:   Wed Apr 29 15:14:04 2015 -0400

    scripts: qmp-shell: Add verbose flag

This is because that change turned the python data structure into a
formatted JSON string before the pretty print was given it. So we're
just pretty printing a string, which is a no-op.

The original pretty printer would output python objects.

(QEMU) query-chardev
{   u'return': [   {   u'filename': u'vc',
                       u'frontend-open': False,
                       u'label': u'parallel0'},
                   {   u'filename': u'vc',
                       u'frontend-open': True,
                       u'label': u'serial0'},
                   {   u'filename': u'unix:/tmp/qemp,server',
                       u'frontend-open': True,
                       u'label': u'compat_monitor0'}]}

This fixes the problem by switching to outputting pretty formatted JSON
text instead. This has the added benefit that the pretty printed output
is now valid JSON text. Due to the way the verbose flag was handled, the
pretty printing now applies to the command sent, as well as its response:

(QEMU) query-chardev
{
    "execute": "query-chardev",
    "arguments": {}
}
{
    "return": [
        {
            "frontend-open": false,
            "label": "parallel0",
            "filename": "vc"
        },
        {
            "frontend-open": true,
            "label": "serial0",
            "filename": "vc"
        },
        {
            "frontend-open": true,
            "label": "compat_monitor0",
            "filename": "unix:/tmp/qmp,server"
        }
    ]
}

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1456224706-1591-1-git-send-email-berrange@redhat.com>
Tested-by: Kashyap Chamarthy <kchamart@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
[Bonus fix: multiple -p now work]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-04 17:16:32 +01:00
Peter Maydell
3c0f12df65 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160304' into staging
target-arm queue:
 * Correct handling of writes to CPSR from gdbstub in user mode
 * virt: lift maximum RAM limit to 255GB
 * sdhci: implement reset
 * virt: if booting in Secure mode, provide secure-only RAM, make first
   flash device secure-only, and assume the EL3 boot rom will handle PSCI
 * bcm2835: use explicit endianness accessors rather than ldl/stl_phys
 * support big-endian in system mode for ARM
 * implement SETEND instruction
 * arm_gic: implement the GICv2 GICC_DIR register
 * fix SRS bug: only trap from S-EL1 to EL3 if specified mode is Mon

# gpg: Signature made Fri 04 Mar 2016 11:38:53 GMT using RSA key ID 14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"

* remotes/pmaydell/tags/pull-target-arm-20160304: (30 commits)
  target-arm: Only trap SRS from S-EL1 if specified mode is MON
  hw/intc/arm_gic.c: Implement GICv2 GICC_DIR
  arm: boot: Support big-endian elfs
  loader: Add data swap option to load-elf
  loader: load_elf(): Add doc comment
  loader: add API to load elf header
  target-arm: implement BE32 mode in system emulation
  target-arm: implement setend
  target-arm: introduce tbflag for endianness
  target-arm: a64: Add endianness support
  target-arm: introduce disas flag for endianness
  target-arm: pass DisasContext to gen_aa32_ld*/st*
  target-arm: implement SCTLR.EE
  linux-user: arm: handle CPSR.E correctly in strex emulation
  linux-user: arm: set CPSR.E/SCTLR.E0E correctly for BE mode
  arm: cpu: handle BE32 user-mode as BE
  target-arm: cpu: Move cpu_is_big_endian to header
  target-arm: implement SCTLR.B, drop bswap_code
  linux-user: arm: pass env to get_user_code_*
  linux-user: arm: fix coding style for some linux-user signal functions
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:46:32 +00:00
Ralf-Philipp Weinmann
ba63cf47a9 target-arm: Only trap SRS from S-EL1 if specified mode is MON
Commit cbc0326b6f caused SRS instructions executed from Secure
EL1 to trap to EL3 even if the specified mode was not monitor mode.

According to the ARMv8 Architecture reference manual [F6.1.203], ALL
of the following conditions need to be met for SRS to trap to EL3:
* It is executed at Secure PL1.
* The specified mode is monitor mode.
* EL3 is using AArch64.

Correct the condition governing the trap to EL3 to check the
specified mode.

Signed-off-by: Ralf-Philipp Weinmann <ralf+devel@comsecuris.com>
Message-id: 20160222224251.GA11654@beta.comsecuris.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: tweaked comment text to read 'specified mode'; edited
 commit message]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:22 +00:00
Peter Maydell
a55c910e0b hw/intc/arm_gic.c: Implement GICv2 GICC_DIR
The GICv2 introduces a new CPU interface register GICC_DIR, which
allows an OS to split the "priority drop" and "deactivate interrupt"
parts of interrupt completion. Implement this register.
(Note that the register is at offset 0x1000 in the CPU interface,
which means it is on a different 4K page from all the other registers.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Sergey Fedorov <serge.fdrv@gmail.com>
Message-id: 1456854176-7813-1-git-send-email-peter.maydell@linaro.org
2016-03-04 11:30:22 +00:00
Peter Crosthwaite
9776f63645 arm: boot: Support big-endian elfs
Support ARM big-endian ELF files in system-mode emulation. When loading
an elf, determine the endianness mode expected by the elf, and set the
relevant CPU state accordingly.

With this, big-endian modes are now fully supported via system-mode LE,
so there is no need to restrict the elf loading to the TARGET
endianness so the ifdeffery on TARGET_WORDS_BIGENDIAN goes away.

Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: fix typo in comments]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:21 +00:00
Peter Crosthwaite
7ef295ea5b loader: Add data swap option to load-elf
Some CPUs are of an opposite data-endianness to other components in the
system. Sometimes elfs have the data sections layed out with this CPU
data-endianness accounting for when loaded via the CPU, so byte swaps
(relative to other system components) will occur.

The leading example, is ARM's BE32 mode, which is is basically LE with
address manipulation on half-word and byte accesses to access the
hw/byte reversed address. This means that word data is invariant
across LE and BE32. This also means that instructions are still LE.
The expectation is that the elf will be loaded via the CPU in this
endianness scheme, which means the data in the elf is reversed at
compile time.

As QEMU loads via the system memory directly, rather than the CPU, we
need a mechanism to reverse elf data endianness to implement this
possibility.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:21 +00:00
Peter Crosthwaite
140b7ce5ff loader: load_elf(): Add doc comment
Document the usage of load_elf() for clarity on current features.

Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:21 +00:00
Peter Crosthwaite
04ae712a9f loader: add API to load elf header
Add an API to load an elf header header from a file. Populates a
buffer with the header contents, as well as a boolean for whether the
elf is 64b or not. Both arguments are optional.

Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: Fix typo in comment]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:21 +00:00
Paolo Bonzini
e334bd3190 target-arm: implement BE32 mode in system emulation
System emulation only has a little-endian target; BE32 mode
is implemented by adjusting the low bits of the address
for every byte and halfword load and store.  64-bit accesses
flip the low and high words.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[PC changes:
  * rebased against master (Jan 2016)
]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:21 +00:00
Paolo Bonzini
9886ecdf31 target-arm: implement setend
Since this is not a high-performance path, just use a helper to
flip the E bit and force a lookup in the hash table since the
flags have changed.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:21 +00:00
Peter Crosthwaite
91cca2cda9 target-arm: introduce tbflag for endianness
Introduce a tbflags for endianness, set based upon the CPUs current
endianness. This in turn propagates through to the disas endianness
flag.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:20 +00:00
Peter Crosthwaite
aa6489da4e target-arm: a64: Add endianness support
Set the dc->mo_endianness flag for AA64 and use it in all ldst ops.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:20 +00:00
Paolo Bonzini
dacf0a2ff7 target-arm: introduce disas flag for endianness
Introduce a disas flag for setting the CPU data endianness. This allows
control of the endianness from the CPU state rather than hard-coding it
to TARGET_WORDS_BIGENDIAN.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[ PC changes:
  * Split off as new patch from original:
        "target-arm: introduce tbflag for CPSR.E"
  * Wrote commit message from scratch
]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:20 +00:00
Paolo Bonzini
12dcc3217d target-arm: pass DisasContext to gen_aa32_ld*/st*
We'll need the DisasContext in the next patch to retrieve the
desired endianness, so pass it as a whole to gen_aa32_ld*/st*.

Unfortunately we cannot let those functions call get_mem_index,
because of user-mode load/store instructions.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[ PC changes:
 * Fix long lines
]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:20 +00:00
Peter Crosthwaite
73462dddf6 target-arm: implement SCTLR.EE
Implement SCTLR.EE bit which controls data endianess for exceptions
and page table translations. SCTLR.EE is mirrored to the CPSR.E bit
on exception entry.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:20 +00:00
Paolo Bonzini
c3ae85fc8f linux-user: arm: handle CPSR.E correctly in strex emulation
Now that CPSR.E is set correctly, prepare for when setend will be able
to change it; bswap data in and out of strex manually by comparing
SCTLR.B, CPSR.E and TARGET_WORDS_BIGENDIAN (we do not have the luxury
of using TCGMemOps).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[ PC changes:
  * Moved SCTLR/CPSR logic to arm_cpu_data_is_big_endian
]
Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:19 +00:00
Peter Crosthwaite
9c5a746038 linux-user: arm: set CPSR.E/SCTLR.E0E correctly for BE mode
If doing big-endian linux-user mode, set both the CPSR.E and SCTLR.E0E
bits. This sets big-endian mode for data accesses.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:19 +00:00
Peter Crosthwaite
b2e62d9a7b arm: cpu: handle BE32 user-mode as BE
endian with address manipulations on subword accesses (to give the
illusion of BE). But user-mode cannot tell the difference and is
already implemented as straight BE. So handle the difference in the
endianess query, where USER mode is BE and system is not.

Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:19 +00:00
Peter Crosthwaite
ed50ff7875 target-arm: cpu: Move cpu_is_big_endian to header
There is a CPU data endianness test that is used to drive the
virtio_big_endian test.

Move this up to the header so it can be more generally used for endian
tests. The KVM specific cpu_syncronize_state call is left behind in the
virtio specific function.

Rename it arm_cpu-data_is_big_endian() to more accurately capture that
this is for data accesses only.

Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:19 +00:00
Paolo Bonzini
f9fd40ebe4 target-arm: implement SCTLR.B, drop bswap_code
bswap_code is a CPU property of sorts ("is the iside endianness the
opposite way round to TARGET_WORDS_BIGENDIAN?") but it is not the
actual CPU state involved here which is SCTLR.B (set for BE32
binaries, clear for BE8).

Replace bswap_code with SCTLR.B, and pass that to arm_ld*_code.
The next patches will make data fetches honor both SCTLR.B and
CPSR.E appropriately.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[PC changes:
 * rebased on master (Jan 2016)
 * s/TARGET_USER_ONLY/CONFIG_USER_ONLY
 * Use bswap_code() for disas_set_info() instead of raw sctlr_b
]
Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:19 +00:00
Paolo Bonzini
49017bd8b4 linux-user: arm: pass env to get_user_code_*
This matches the idiom used by get_user_data_* later in the series,
and will help when bswap_code will be replaced by SCTLR.B.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:18 +00:00
Paolo Bonzini
a0e1e6d705 linux-user: arm: fix coding style for some linux-user signal functions
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:18 +00:00
Andrew Baumann
eab713941a bcm2835_mbox/property: replace ldl_phys/stl_phys with endian-specific accesses
PMM pointed out that ldl_phys and stl_phys are dependent on the CPU's
endianness, whereas device model code should be independent of
it. This changes the relevant Raspberry Pi devices to explicitly call
the little-endian variants.

Signed-off-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
Message-id: 1456880233-22568-1-git-send-email-Andrew.Baumann@microsoft.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-04 11:30:18 +00:00
Peter Maydell
4824a61a6d hw/arm/virt: Assume EL3 boot rom will handle PSCI if one is provided
If the user passes us an EL3 boot rom, then it is going to want to
implement the PSCI interface itself. In this case, disable QEMU's
internal PSCI implementation so it does not get in the way, and
instead start all CPUs in an SMP configuration at once (the boot
rom will catch them all and pen up the secondaries until needed).
The boot rom code is also responsible for editing the device tree
to include any necessary information about its own PSCI implementation
before eventually passing it to a NonSecure guest.

(This "start all CPUs at once" approach is what both ARM Trusted
Firmware and UEFI expect, since it is what the ARM Foundation Model
does; the other approach would be to provide some emulated hardware
for "start the secondaries" but this is simplest.)

This is a compatibility break, but I don't believe that anybody
was using a secure boot ROM with an SMP configuration. Such a setup
would be somewhat broken since there was nothing preventing nonsecure
guest code from calling the QEMU PSCI function to start up a secondary
core in a way that completely bypassed the secure world.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Message-id: 1456853976-7592-1-git-send-email-peter.maydell@linaro.org
2016-03-04 11:30:18 +00:00
Peter Maydell
738a5d9fbb hw/arm/virt: Make first flash device Secure-only if booting secure
If the virt board is started with the 'secure' property set to
request a Secure setup, then make the first flash device be
visible only to the Secure world.

This is a breaking change, but I don't expect it to be noticed
by anybody, because running TZ-aware guests isn't common and
those guests are generally going to be booting from the flash
and implicitly expecting their Non-secure guests to not touch it.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1455288361-30117-5-git-send-email-peter.maydell@linaro.org
2016-03-04 11:30:18 +00:00
Peter Maydell
16f4a8dc5c hw/arm/virt: Load bios image to MemoryRegion, not physaddr
If we're loading a BIOS image into the first flash device,
load it into the flash's memory region specifically, not
into the physical address where the flash resides. This will
make a difference when the flash might be in the Secure
address space rather than the Nonsecure one.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1455288361-30117-4-git-send-email-peter.maydell@linaro.org
2016-03-04 11:30:17 +00:00
Peter Maydell
76151cacfe loader: Add load_image_mr() to load ROM image to a MemoryRegion
Add a new function load_image_mr(), which behaves like
load_image_targphys() except that it loads the ROM image to
a specified MemoryRegion rather than to a specified physical
address. This is useful when a ROM blob needs to be loaded
to a particular flash or ROM device but the address of that
device in the machine's address space is not known. (For
instance, ROMs in devices, or ROMs which might exist in
a different address space to the system address space.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1455288361-30117-3-git-send-email-peter.maydell@linaro.org
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
2016-03-04 11:30:17 +00:00
Peter Maydell
83ec1923cd hw/arm/virt: Provide a secure-only RAM if booting in Secure mode
If we're booting in Secure mode, provide a secure-only RAM
(just 16MB) so that secure firmware has somewhere to run
from that won't be accessible to the Non-secure guest.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1455288361-30117-2-git-send-email-peter.maydell@linaro.org
2016-03-04 11:30:17 +00:00
Peter Maydell
8b41c30525 sdhci: Implement DeviceClass reset
The sdhci device was missing a DeviceClass reset method;
implement it. Poweron reset looks the same as reset commanded
by the guest via the device registers, apart from modelling of
the rpi 'pending insert interrupt on powerup' quirk.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
Message-id: 1456493044-10025-3-git-send-email-peter.maydell@linaro.org
2016-03-04 11:30:17 +00:00
Peter Maydell
0719e71e52 sd.c: Handle NULL block backend in sd_get_inserted()
The sd.c SD card emulation code can be in a state where the
SDState BlockBackend pointer is NULL; this is treated as
"card not present". Add a missing check to sd_get_inserted()
so that we don't segfault in this situation.

(This could be provoked by the guest writing to the SDHCI
register to do a reset on a xilinx-zynq-a9 board; it will
also happen at startup when sdhci implements its DeviceClass
reset method.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Message-id: 1456493044-10025-2-git-send-email-peter.maydell@linaro.org
2016-03-04 11:30:17 +00:00
Peter Maydell
71c2768433 virt: Lift the maximum RAM limit from 30GB to 255GB
The virt board restricts guests to only 30GB of RAM. This is a
hangover from the vexpress-a15 board, and there's no inherent reason
for it. 30GB is smaller than you might reasonably want to provision
a VM for on a beefy server machine. Raise the limit to 255GB.

We choose 255GB because the available space we currently have
below the 1TB boundary is up to the 512GB mark, but we don't
want to paint ourselves into a corner by assigning it all to
RAM. So we make half of it available for RAM, with the 256GB..512GB
range available for future non-RAM expansion purposes.

If we need to provide more RAM to VMs in the future then we need to:
 * allocate a second bank of RAM starting at 2TB and working up
 * fix the DT and ACPI table generation code in QEMU to correctly
   report two split lumps of RAM to the guest
 * fix KVM in the host kernel to allow guests with >40 bit address spaces

The last of these is obviously the trickiest, but it seems
reasonable to assume that anybody configuring a VM with a quarter
of a terabyte of RAM will be doing it on a host with more than a
terabyte of physical address space.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Tested-by: Wei Huang <wei@redhat.com>
Message-id: 1456402182-11651-1-git-send-email-peter.maydell@linaro.org
2016-03-04 11:30:16 +00:00
Peter Maydell
8c4f0eb94c target-arm: Correct handling of writes to CPSR mode bits from gdb in usermode
In helper.c the expression
  (env->uncached_cpsr & CPSR_M) != CPSR_USER
is always true; the right hand side was supposed to be ARM_CPU_MODE_USR
(an error in commit cb01d391).

Since the incorrect expression was always true, this just meant that
commit cb01d391 had no effect.

However simply changing the RHS here would reveal a logic error: if
the mode is USR we wish to completely ignore the attempt to set the
mode bits, which means that we must clear the CPSR_M bits from mask
to avoid the uncached_cpsr bits being updated at the end of the
function.

Move the condition into the correct place in the code, fix its RHS
constant, and add a comment about the fact that we must be doing a
gdbstub write if we're in user mode.

Fixes: https://bugs.launchpad.net/qemu/+bug/1550503
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Sergey Fedorov <serge.fdrv@gmail.com>
Message-id: 1456764438-30015-1-git-send-email-peter.maydell@linaro.org
2016-03-04 11:30:16 +00:00
Peter Maydell
2d3b7c0164 Merge remote-tracking branch 'remotes/amit-virtio-rng/tags/rng-for-2.6-1' into staging
rng:
- implement a request queue for rng-random so multiple guest requests
  don't result in vq buffers getting forgotten
- remove unused request cancellation code
- a VM with multiple vq buffers, when migrated, could get in a situation
  where not all buffers are handed back to the guest.  This is now
  fixed.

# gpg: Signature made Thu 03 Mar 2016 12:18:54 GMT using RSA key ID 854083B6
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"

* remotes/amit-virtio-rng/tags/rng-for-2.6-1:
  virtio-rng: ask for more data if queue is not fully drained
  rng: add request queue support to rng-random
  rng: move request queue cleanup from RngEgd to RngBackend
  rng: move request queue from RngEgd to RngBackend
  rng: remove the unused request cancellation code
  MAINTAINERS: Add an entry for the include/sysemu/rng*.h files

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-03 13:13:36 +00:00
Ladi Prosek
f8693c2cd0 virtio-rng: ask for more data if queue is not fully drained
This commit effectively reverts:

  commit 4621c1768e
  Author: Amit Shah <amit.shah@redhat.com>
  Date:   Wed Nov 21 11:21:19 2012 +0530

  virtio-rng: remove extra request for entropy

but instead of calling virtio_rng_process unconditionally, it
first checks to see if the queue is empty as a little bit of
optimization.

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1456998514-19271-1-git-send-email-lprosek@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-03 17:42:26 +05:30
Ladi Prosek
60253ed1e6 rng: add request queue support to rng-random
Requests are now created in the RngBackend parent class and the
code path is shared by both rng-egd and rng-random.

This commit fixes the rng-random implementation which processed
only one request at a time and simply discarded all but the most
recent one. In the guest this manifested as delayed completion
of reads from virtio-rng, i.e. a read was completed only after
another read was issued.

By switching rng-random to use the same request queue as rng-egd,
the unsafe stack-based allocation of the entropy buffer is
eliminated and replaced with g_malloc.

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1456994238-9585-5-git-send-email-lprosek@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-03 17:42:26 +05:30
Ladi Prosek
9f14b0add1 rng: move request queue cleanup from RngEgd to RngBackend
RngBackend is now in charge of cleaning up the linked list on
instance finalization. It also exposes a function to finalize
individual RngRequest instances, called by its child classes.

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1456994238-9585-4-git-send-email-lprosek@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-03 17:42:26 +05:30
Ladi Prosek
74074e8a7c rng: move request queue from RngEgd to RngBackend
The 'requests' field now lives in the RngBackend parent class.
There are no functional changes in this commit.

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1456994238-9585-3-git-send-email-lprosek@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-03 17:42:26 +05:30
Ladi Prosek
3c52ddcdc5 rng: remove the unused request cancellation code
rng_backend_cancel_requests had no callers and none of the code
deleted in this commit ever ran.

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1456994238-9585-2-git-send-email-lprosek@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-03 17:42:26 +05:30
Thomas Huth
750cf86932 MAINTAINERS: Add an entry for the include/sysemu/rng*.h files
These headers are used by the virtio-rng and rng backends code,
so they should be listed in the same section in MAINTAINERS, too.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1456404260-26928-1-git-send-email-thuth@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-03-03 17:42:23 +05:30
Peter Maydell
ed6128ebbd Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Tue 01 Mar 2016 15:48:04 GMT using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"

* remotes/stefanha/tags/tracing-pull-request:
  trace: Add a proper API to manage auto-generated events from the 'tcg' property
  trace: Add 'vcpu' event property to trace guest vCPU
  typedefs: Add CPUState
  trace: Add helper function to cast event arguments
  tcg: Move definition of type TCGv
  tcg: Add type for vCPU pointers
  trace: Remove unnecessary intermediate event copies
  trace: Extend API to manage event arguments
  vl: fix tracing initialization
  trace: use addresses instead of offsets in memory tracepoints
  trace: split subpage MMIOs into their own trace events.
  trace: docs: "simple" backend does support strings
  trace: drop trailing empty strings

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-01 15:54:03 +00:00
Lluís Vilanova
4ade0541de trace: Add a proper API to manage auto-generated events from the 'tcg' property
Formalizes the existence of the 'event_trans' and 'event_exec' event
attributes, which until now were monkey-patched only when necessary.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 145640558759.20978.6374959404425591089.stgit@localhost
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:34:38 +00:00
Lluís Vilanova
3d211d9f4d trace: Add 'vcpu' event property to trace guest vCPU
This property identifies events that trace vCPU-specific information.

It adds a "CPUState*" argument to events with the property, identifying
the vCPU raising the event. TCG translation events also have a
"TCGv_env" implicit argument that is later used as the "CPUState*"
argument at execution time.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 145641861797.30295.6991314023181842105.stgit@localhost
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:27:10 +00:00
Lluís Vilanova
b23197f9cf typedefs: Add CPUState
Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 145641861239.30295.8564457138934628740.stgit@localhost
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:27:09 +00:00
Lluís Vilanova
bc9beb47c7 trace: Add helper function to cast event arguments
Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 145641860680.30295.1873612736245870753.stgit@localhost
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:27:09 +00:00
Lluís Vilanova
5d4e1a1081 tcg: Move definition of type TCGv
The target-dependant type TCGv must be defined in "tcg/tcg.h" before
including the tracing helper wrappers in "tcg/tcg-op.h".

It also makes more sense to define it here, where other TCG types are
defined too.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 145641860129.30295.17554707227384022653.stgit@localhost
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:27:09 +00:00
Lluís Vilanova
1bcea73e13 tcg: Add type for vCPU pointers
Adds the 'TCGv_env' type for pointers to 'CPUArchState' objects. The
tracing infrastructure later needs to differentiate between regular
pointers and pointers to vCPUs.

Also changes all targets to use the new 'TCGv_env' type instead of the
generic 'TCGv_ptr'. As of now, the change is merely cosmetic ('TCGv_env'
translates into 'TCGv_ptr'), but that could change in the future to
enforce the difference.

Note that a 'TCGv_env' type (for 'CPUState') is not added, since all
helpers currently receive the architecture-specific
pointer ('CPUArchState').

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Acked-by: Richard Henderson <rth@twiddle.net>
Message-id: 145641859552.30295.7821536833590725201.stgit@localhost
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:27:09 +00:00
Lluís Vilanova
56797b1fbc trace: Remove unnecessary intermediate event copies
The current code forces the use of a chain of ".original" dereferences,
which looks odd.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 145641858988.30295.7223459456488075843.stgit@localhost
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:27:09 +00:00
Lluís Vilanova
3596f524d4 trace: Extend API to manage event arguments
Lets the user manage event arguments as a list, and simplifies argument
concatenation.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 145641858432.30295.3069911069472672646.stgit@localhost
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:27:09 +00:00
Denis V. Lunev
62cb4145bb vl: fix tracing initialization
we should call trace_init_backends() before trace_init_file() for
CONFIG_TRACE_SIMPLE There is no difference for other cases.

This problem was introduced by the commit
    commit 41fc57e44e
    Author: Paolo Bonzini <pbonzini@redhat.com>
    Date:   Thu Jan 7 16:55:24 2016 +0300

    trace: split trace_init_file out of trace_init_backends

'make check' was failed as a result if configured with
  --enable-trace-backends=simple

Spotted by Alex Bennée.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-id: 1455036545-14870-1-git-send-email-den@openvz.org
CC: Alex Bennée <alex.bennee@linaro.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:20:15 +00:00
Hollis Blanchard
4779dc1d19 trace: use addresses instead of offsets in memory tracepoints
When memory_region_ops tracepoints are enabled, calculate and record the
absolute address being accessed. Otherwise, we only get offsets into the
memory region instead of addresses.

[Fixed "offset" -> "addr" in trace event format strings.
--Stefan]

Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
Message-id: 1454976185-30095-3-git-send-email-hollis_blanchard@mentor.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:20:15 +00:00
Hollis Blanchard
23d92d68e7 trace: split subpage MMIOs into their own trace events.
Previously, a single MMIO could trigger the memory_region_ops tracepoint twice:
once on its way into subpage ops, then later on its way into the model's ops.

Also, the fields previously called "addr" are actually offsets into the memory
region. Rename them to "offset" while we're editing the tracepoint definitions.

Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
Message-id: 1454976185-30095-2-git-send-email-hollis_blanchard@mentor.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:20:15 +00:00
Hollis Blanchard
2c140f5f2c trace: docs: "simple" backend does support strings
The simple tracing backend has supported strings for more than three years
(62bab73213).

Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
Message-id: 1454976185-30095-1-git-send-email-hollis_blanchard@mentor.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:20:15 +00:00
Greg Kurz
6411dd1334 trace: drop trailing empty strings
Also fix a typo in the virtio_balloon_handle_output() trace while here.

[The double-quoting was a limitation of the old tracetool.sh script.
The modern tracetool.py script does not require double-quotes at the end
of the line.  See commit cf85cf8e97
("trace: Format strings must begin/end with double quotes").
--Stefan]

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-id: 20160111173036.24764.59878.stgit@bahia.huguette.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-03-01 13:20:15 +00:00
Peter Maydell
9c279bec75 Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160301' into staging
Assorted fixes, cleanups and enhancements.

# gpg: Signature made Tue 01 Mar 2016 11:45:12 GMT using RSA key ID C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"

* remotes/cohuck/tags/s390x-20160301:
  s390x/css: only suspend when enabled by orb
  MAINTAINERS: Remove entry for hw/s390x/s390-virtio-bus.[ch]
  MAINTAINERS: Remove the old s390-virtio machine
  s390x/pci: use PCI_MSIX_FLAGS on retrieving the MSIX entries
  s390x/css: Use static initialization for channel_subsys fields
  s390x/css: Allocate channel_subsys statically
  s390x/pci: fix reg/dereg irq functions
  s390x/css: introduce indicator refcounting interfaces
  s390x/virtio: old machine leftovers
  watchdog/diag288: avoid race condition on expired watchdog
  s390x: remove {kvm_}s390_virtio_irq()
  s390x: fix debug statement in trigger_page_fault()
  s390x/kvm: sync fprs via kvm_run
  linux-headers: update against kvm/next

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-01 13:09:55 +00:00
Peter Maydell
646fd16865 Merge remote-tracking branch 'remotes/kraxel/tags/pull-seabios-20160301-1' into staging
seabios: update to 1.9.1 stable release

# gpg: Signature made Tue 01 Mar 2016 08:39:53 GMT using RSA key ID D3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"

* remotes/kraxel/tags/pull-seabios-20160301-1:
  seabios: update to 1.9.1 stable release

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-01 12:18:23 +00:00
Cornelia Huck
ce350f32e4 s390x/css: only suspend when enabled by orb
We must not allow a channel program to suspend if the suspend
control bit in the orb had not been specified.

Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:29 +01:00
Thomas Huth
d90527178c MAINTAINERS: Remove entry for hw/s390x/s390-virtio-bus.[ch]
The files have been deleted recently, no need to keep these entries
anymore.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1456397100-22746-1-git-send-email-thuth@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:29 +01:00
Thomas Huth
6aaa681c9b MAINTAINERS: Remove the old s390-virtio machine
The old s390-virtio machine has been removed last year, so we don't
need the corresponding section in the MAINTAINERS file anymore.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1456394274-21082-1-git-send-email-thuth@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:29 +01:00
Wei Yang
ce1307e180 s390x/pci: use PCI_MSIX_FLAGS on retrieving the MSIX entries
Even PCI_CAP_FLAGS has the same value as PCI_MSIX_FLAGS, the later one is
the more proper on retrieving MSIX entries.

This patch uses PCI_MSIX_FLAGS to retrieve the MSIX entries.

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
CC: Cornelia Huck <cornelia.huck@de.ibm.com>
CC: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <1455895091-7589-3-git-send-email-richard.weiyang@gmail.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:29 +01:00
Eduardo Habkost
bc994b74ea s390x/css: Use static initialization for channel_subsys fields
machine_init() will be gone, but we don't need it if we just
initialize the channel_subsys fields statically.

Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Alexander Graf <agraf@suse.de>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <1455656347-29033-4-git-send-email-ehabkost@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
[adapted on top of indicator changes]
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:29 +01:00
Eduardo Habkost
562f5e0b97 s390x/css: Allocate channel_subsys statically
There's no need to use g_malloc0() to allocate the channel_subsys
struct, just use a static variable.

Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Alexander Graf <agraf@suse.de>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <1455656347-29033-3-git-send-email-ehabkost@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
[adapted on top of indicator changes]
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:29 +01:00
Yi Min Zhao
8581c115d2 s390x/pci: fix reg/dereg irq functions
Indicator refcounting interfaces are introduced. This patch fixes
introducing unneeded indicator mappings and failure to release
AISB mappings on deregistration.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:29 +01:00
Yi Min Zhao
a28d8391e3 s390x/css: introduce indicator refcounting interfaces
Currently, virtio-ccw uses its own interfaces to keep indicators mapped
just once even if the same address has been registered multiple times.
These interfaces fit the PCI use case as well. Therefore, move them to
css and make them generic interfaces.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:28 +01:00
Cornelia Huck
99abd0d6f7 s390x/virtio: old machine leftovers
Remove some now unused #defines.

Reviewed-By: Sascha Silbe <silbe@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:28 +01:00
Sascha Silbe
fe345a3d5d watchdog/diag288: avoid race condition on expired watchdog
When configured to inject an NMI, watchdog_perform_action() may cause
the BQL to be temporarily relinquished (inject_nmi() → ... →
s390_nmi() → s390_cpu_restart() → run_on_cpu()). When the guest issues
diag 288 again in response to the NMI, the diag 288 operation will
race against wdt_diag288_reset(). Depending on scheduler behaviour,
wdt_diag288_reset() may be run after the guest issued a diag 288
Init. As a result, we will cancel the timer the guest just set up. The
effect observed by the guest is that a second expiry does not trigger
the watchdog action and diag 288 Change operations fail.

Fix this by resetting the timer _before_ invoking the action.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Acked-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:28 +01:00
Cornelia Huck
8777f6abdb s390x: remove {kvm_}s390_virtio_irq()
This interface was only used by the old virtio machine and therefore
is not needed anymore.

Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:28 +01:00
David Hildenbrand
c5b2ee4c7a s390x: fix debug statement in trigger_page_fault()
When mmu_translate debugging output is enabled, code won't compile.
Let's just use the same statement as in trigger_prot_fault().

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:28 +01:00
David Hildenbrand
5ab0e547bf s390x/kvm: sync fprs via kvm_run
We can now also sync the fprs via kvm_run, avoiding one ioctl.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:28 +01:00
Cornelia Huck
66fb2d5467 linux-headers: update against kvm/next
Update against commit efef127c, but keep userfaultd.h.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-03-01 12:15:28 +01:00
Peter Maydell
0b85d73583 Merge remote-tracking branch 'remotes/kraxel/tags/pull-input-20160301-1' into staging
qapi: fix input-send-event and promote to stable

# gpg: Signature made Tue 01 Mar 2016 08:19:52 GMT using RSA key ID D3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"

* remotes/kraxel/tags/pull-input-20160301-1:
  qapi: promote input-send-event to stable
  qapi: rename InputAxis values.
  qapi: rename input buttons
  qapi: switch x-input-send-event from console to device+head
  console: add & use qemu_console_lookup_by_device_name

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-01 11:15:00 +00:00
Peter Maydell
d9c7737e57 Merge remote-tracking branch 'remotes/kraxel/tags/pull-vga-20160301-1' into staging
vga: minor cirrus/qxl bugfixes.

# gpg: Signature made Tue 01 Mar 2016 07:16:22 GMT using RSA key ID D3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"

* remotes/kraxel/tags/pull-vga-20160301-1:
  qxl: lock current_async update in qxl_soft_reset
  cirrus_vga: fix off-by-one in blit_region_is_unsafe

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-01 10:34:19 +00:00
Peter Maydell
9c74a85304 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Mon 29 Feb 2016 20:08:16 GMT using RSA key ID C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"

* remotes/cody/tags/block-pull-request:
  iotests/124: Add cluster_size mismatch test
  block/backup: avoid copying less than full target clusters
  block/backup: make backup cluster size configurable
  mirror: Add mirror_wait_for_io
  mirror: Rewrite mirror_iteration
  vhdx: Simplify vhdx_set_shift_bits()
  vhdx: DIV_ROUND_UP() in vhdx_calc_bat_entries()
  iscsi: add support for getting CHAP password via QCryptoSecret API
  curl: add support for HTTP authentication parameters
  rbd: add support for getting password from QCryptoSecret object
  sheepdog: allow to delete snapshot
  block/nfs: add support for setting debug level

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-03-01 09:54:53 +00:00
Gerd Hoffmann
fee5b753ff seabios: update to 1.9.1 stable release
git shortlog rel-1.9.0..rel-1.9.1
=================================

Cole Robinson (1):
      biostables: Support SMBIOS 2.6+ UUID format

Kevin O'Connor (7):
      xhci: Check for device disconnects during USB2 reset polling
      xhci: Wait for port enable even for USB3 devices
      sdcard: Only enable error_irq_enable for bits defined in SDHCI v1 spec
      sdcard: fix typo causing 32bit write to 16bit block_size field
      nmi: Don't try to switch onto extra stack in NMI handler
      scsi: Do not call printf() from scsi_is_ready()
      coreboot: Check for unaligned cbfs header

Marcel Apfelbaum (1):
      fw/pci: do not automatically allocate IO region for PCIe bridges

Roger Pau Monne (1):
      build: fix typo in buildversion.py

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-01 09:37:07 +01:00
Gerd Hoffmann
05fa1c742f qxl: lock current_async update in qxl_soft_reset
This should fix a defect report from Coverity.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-01 07:51:32 +01:00
Paolo Bonzini
d2ba7ecb34 cirrus_vga: fix off-by-one in blit_region_is_unsafe
The "max" value is being compared with >=, but addr + width points to
the first byte that will _not_ be copied.  Laszlo suggested using a
"greater than" comparison, instead of subtracting one like it is
already done above for the height, so that max remains always positive.

The mistake is "safe"---it will reject some blits, but will never cause
out-of-bounds writes.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Message-id: 1455121059-18280-1-git-send-email-pbonzini@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-03-01 07:51:32 +01:00
John Snow
cc199b16cf iotests/124: Add cluster_size mismatch test
If a backing file isn't specified in the target image and the
cluster_size is larger than the bitmap granularity, we run the risk of
creating bitmaps with allocated clusters but empty/no data which will
prevent the proper reading of the backup in the future.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1456433911-24718-4-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:55:14 -05:00
John Snow
4c9bca7e39 block/backup: avoid copying less than full target clusters
During incremental backups, if the target has a cluster size that is
larger than the backup cluster size and we are backing up to a target
that cannot (for whichever reason) pull clusters up from a backing image,
we may inadvertantly create unusable incremental backup images.

For example:

If the bitmap tracks changes at a 64KB granularity and we transmit 64KB
of data at a time but the target uses a 128KB cluster size, it is
possible that only half of a target cluster will be recognized as dirty
by the backup block job. When the cluster is allocated on the target
image but only half populated with data, we lose the ability to
distinguish between zero padding and uninitialized data.

This does not happen if the target image has a backing file that points
to the last known good backup.

Even if we have a backing file, though, it's likely going to be faster
to just buffer the redundant data ourselves from the live image than
fetching it from the backing file, so let's just always round up to the
target granularity.

The same logic applies to backup modes top, none, and full. Copying
fractional clusters without the guarantee of COW is dangerous, but even
if we can rely on COW, it's likely better to just re-copy the data.

Reported-by: Fam Zheng <famz@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1456433911-24718-3-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:55:14 -05:00
John Snow
16096a4d47 block/backup: make backup cluster size configurable
64K might not always be appropriate, make this a runtime value.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1456433911-24718-2-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:55:14 -05:00
Fam Zheng
21cd917ff5 mirror: Add mirror_wait_for_io
The three lines are duplicated a number of times now, refactor a
function.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 1454637630-10585-3-git-send-email-famz@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:54:31 -05:00
Fam Zheng
e5b43573e2 mirror: Rewrite mirror_iteration
The "pnum < nb_sectors" condition in deciding whether to actually copy
data is unnecessarily strict, and the qiov initialization is
unnecessarily for bdrv_aio_write_zeroes and bdrv_aio_discard.

Rewrite mirror_iteration to fix both flaws.

The output of iotests 109 is updated because we now report the offset
and len slightly differently in mirroring progress.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 1454637630-10585-2-git-send-email-famz@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:54:31 -05:00
Max Reitz
04a3615860 vhdx: Simplify vhdx_set_shift_bits()
For values which are powers of two (and we do assume all of these to
be), sizeof(x) * 8 - 1 - clz(x) == ctz(x). Therefore, use ctz().

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 1450451066-13335-3-git-send-email-mreitz@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:54:31 -05:00
Max Reitz
939901dcd2 vhdx: DIV_ROUND_UP() in vhdx_calc_bat_entries()
We have DIV_ROUND_UP(), so we can use it to produce more easily readable
code. It may be slower than the bit shifting currently performed
(because it actually performs a division), but since
vhdx_calc_bat_entries() is never used in a hot path, this is completely
fine.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 1450451066-13335-2-git-send-email-mreitz@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:54:31 -05:00
Daniel P. Berrange
b189346eb1 iscsi: add support for getting CHAP password via QCryptoSecret API
The iSCSI driver currently accepts the CHAP password in plain text
as a block driver property. This change adds a new "password-secret"
property that accepts the ID of a QCryptoSecret instance.

  $QEMU \
     -object secret,id=sec0,filename=/home/berrange/example.pw \
     -drive driver=iscsi,url=iscsi://example.com/target-foo/lun1,\
            user=dan,password-secret=sec0

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1453385961-10718-4-git-send-email-berrange@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:54:31 -05:00
Daniel P. Berrange
1bff960642 curl: add support for HTTP authentication parameters
If connecting to a web server which has authentication
turned on, QEMU gets a 401 as curl has not been configured
with any authentication credentials.

This adds 4 new parameters to the curl block driver
options 'username', 'password-secret', 'proxy-username'
and 'proxy-password-secret'. Passwords are provided using
the recently added 'secret' object type

 $QEMU \
     -object secret,id=sec0,filename=/home/berrange/example.pw \
     -object secret,id=sec1,filename=/home/berrange/proxy.pw \
     -drive driver=http,url=http://example.com/some.img,\
            username=dan,password-secret=sec0,\
            proxy-username=dan,proxy-password-secret=sec1

Of course it is possible to use the same secret for both the
proxy & server passwords if desired, or omit the proxy auth
details, or the server auth details as required.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1453385961-10718-3-git-send-email-berrange@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:54:31 -05:00
Daniel P. Berrange
60390a2192 rbd: add support for getting password from QCryptoSecret object
Currently RBD passwords must be provided on the command line
via

  $QEMU -drive file=rbd:pool/image:id=myname:\
               key=QVFDVm41aE82SHpGQWhBQXEwTkN2OGp0SmNJY0UrSE9CbE1RMUE=:\
               auth_supported=cephx

This is insecure because the key is visible in the OS process
listing.

This adds support for an 'password-secret' parameter in the RBD
parameters that can be used with the QCryptoSecret object to
provide the password via a file:

  echo "QVFDVm41aE82SHpGQWhBQXEwTkN2OGp0SmNJY0UrSE9CbE1RMUE=" > poolkey.b64
  $QEMU -object secret,id=secret0,file=poolkey.b64,format=base64 \
        -drive driver=rbd,filename=rbd:pool/image:id=myname:\
               auth_supported=cephx,password-secret=secret0

Reviewed-by: Josh Durgin <jdurgin@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1453385961-10718-2-git-send-email-berrange@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:54:30 -05:00
Vasiliy Tolstov
eab8eb8db3 sheepdog: allow to delete snapshot
This patch implements a blockdriver function bdrv_snapshot_delete() in
the sheepdog driver. With the new function, snapshots of sheepdog can
be deleted from libvirt.

Cc: Jeff Cody <jcody@redhat.com>
Signed-off-by: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
Signed-off-by: Vasiliy Tolstov <v.tolstov@selfip.ru>
Message-id: 1450873346-22334-1-git-send-email-mitake.hitoshi@lab.ntt.co.jp
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:54:30 -05:00
Peter Lieven
7725b8bf12 block/nfs: add support for setting debug level
recent libnfs versions support logging debug messages. Add
support for it in qemu through an URL parameter.

Example:
 qemu -cdrom nfs://127.0.0.1/iso/my.iso?debug=2

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1447052973-14513-1-git-send-email-pl@kamp.de
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-29 14:54:30 -05:00
459 changed files with 16404 additions and 5640 deletions

View File

@@ -234,6 +234,7 @@ L: kvm@vger.kernel.org
S: Supported
F: kvm-*
F: */kvm.*
F: include/sysemu/kvm*.h
ARM
M: Peter Maydell <peter.maydell@linaro.org>
@@ -656,12 +657,6 @@ F: hw/*/grlib*
S390 Machines
-------------
S390 Virtio
M: Alexander Graf <agraf@suse.de>
S: Maintained
F: hw/s390x/s390-*.c
X: hw/s390x/*pci*.[hc]
S390 Virtio-ccw
M: Cornelia Huck <cornelia.huck@de.ibm.com>
M: Christian Borntraeger <borntraeger@de.ibm.com>
@@ -669,7 +664,6 @@ M: Alexander Graf <agraf@suse.de>
S: Supported
F: hw/char/sclp*.[hc]
F: hw/s390x/
X: hw/s390x/s390-virtio-bus.[ch]
F: include/hw/s390x/
F: pc-bios/s390-ccw/
F: hw/watchdog/wdt_diag288.c
@@ -723,6 +717,12 @@ F: hw/timer/hpet*
F: hw/timer/i8254*
F: hw/timer/mc146818rtc*
Machine core
M: Eduardo Habkost <ehabkost@redhat.com>
M: Marcel Apfelbaum <marcel@redhat.com>
S: Supported
F: hw/core/machine.c
F: include/hw/boards.h
Xtensa Machines
---------------
@@ -872,6 +872,7 @@ VFIO
M: Alex Williamson <alex.williamson@redhat.com>
S: Supported
F: hw/vfio/*
F: include/hw/vfio/
vhost
M: Michael S. Tsirkin <mst@redhat.com>
@@ -883,6 +884,7 @@ M: Michael S. Tsirkin <mst@redhat.com>
S: Supported
F: hw/*/virtio*
F: net/vhost-user.c
F: include/hw/virtio/
virtio-9p
M: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
@@ -928,6 +930,7 @@ M: Amit Shah <amit.shah@redhat.com>
S: Supported
F: hw/virtio/virtio-rng.c
F: include/hw/virtio/virtio-rng.h
F: include/sysemu/rng*.h
F: backends/rng*.c
nvme
@@ -1013,7 +1016,7 @@ F: blockjob.c
F: include/block/blockjob.h
F: block/backup.c
F: block/commit.c
F: block/stream.h
F: block/stream.c
F: block/mirror.c
T: git git://github.com/codyprime/qemu-kvm-jtc.git block
@@ -1128,6 +1131,7 @@ Network device backends
M: Jason Wang <jasowang@redhat.com>
S: Maintained
F: net/
F: include/net/
T: git git://github.com/jasowang/qemu.git net
Netmap network backend
@@ -1223,10 +1227,12 @@ F: scripts/qmp/
T: git git://repo.or.cz/qemu/armbru.git qapi-next
SLIRP
M: Samuel Thibault <samuel.thibault@ens-lyon.org>
M: Jan Kiszka <jan.kiszka@siemens.com>
S: Maintained
F: slirp/
F: net/slirp.c
F: include/net/slirp.h
T: git git://git.kiszka.org/qemu.git queues/slirp
Tracing

View File

@@ -238,7 +238,7 @@ qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-o
qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o
qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a
fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a
fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap
@@ -329,7 +329,7 @@ ifneq ($(EXESUF),)
qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
endif
ivshmem-client$(EXESUF): $(ivshmem-client-obj-y)
ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a
$(call LINK, $^)
ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
$(call LINK, $^)

View File

@@ -18,7 +18,7 @@
#include "block/block.h"
#include "qemu/queue.h"
#include "qemu/sockets.h"
#ifdef CONFIG_EPOLL
#ifdef CONFIG_EPOLL_CREATE1
#include <sys/epoll.h>
#endif
@@ -33,7 +33,7 @@ struct AioHandler
QLIST_ENTRY(AioHandler) node;
};
#ifdef CONFIG_EPOLL
#ifdef CONFIG_EPOLL_CREATE1
/* The fd number threashold to switch to epoll */
#define EPOLL_ENABLE_THRESHOLD 64
@@ -483,7 +483,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
void aio_context_setup(AioContext *ctx, Error **errp)
{
#ifdef CONFIG_EPOLL
#ifdef CONFIG_EPOLL_CREATE1
assert(!ctx->epollfd);
ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
if (ctx->epollfd == -1) {

View File

@@ -567,7 +567,7 @@ static CharDriverState *chr_baum_init(const char *id,
ChardevReturn *ret,
Error **errp)
{
ChardevCommon *common = qapi_ChardevDummy_base(backend->u.braille);
ChardevCommon *common = backend->u.braille;
BaumDriverState *baum;
CharDriverState *chr;
brlapi_handle_t *handle;

View File

@@ -68,7 +68,7 @@ static CharDriverState *qemu_chr_open_msmouse(const char *id,
ChardevReturn *ret,
Error **errp)
{
ChardevCommon *common = qapi_ChardevDummy_base(backend->u.msmouse);
ChardevCommon *common = backend->u.msmouse;
CharDriverState *chr;
chr = qemu_chr_alloc(common, errp);

View File

@@ -25,33 +25,12 @@ typedef struct RngEgd
CharDriverState *chr;
char *chr_name;
GSList *requests;
} RngEgd;
typedef struct RngRequest
{
EntropyReceiveFunc *receive_entropy;
uint8_t *data;
void *opaque;
size_t offset;
size_t size;
} RngRequest;
static void rng_egd_request_entropy(RngBackend *b, size_t size,
EntropyReceiveFunc *receive_entropy,
void *opaque)
static void rng_egd_request_entropy(RngBackend *b, RngRequest *req)
{
RngEgd *s = RNG_EGD(b);
RngRequest *req;
req = g_malloc(sizeof(*req));
req->offset = 0;
req->size = size;
req->receive_entropy = receive_entropy;
req->opaque = opaque;
req->data = g_malloc(req->size);
size_t size = req->size;
while (size > 0) {
uint8_t header[2];
@@ -65,24 +44,15 @@ static void rng_egd_request_entropy(RngBackend *b, size_t size,
size -= len;
}
s->requests = g_slist_append(s->requests, req);
}
static void rng_egd_free_request(RngRequest *req)
{
g_free(req->data);
g_free(req);
}
static int rng_egd_chr_can_read(void *opaque)
{
RngEgd *s = RNG_EGD(opaque);
GSList *i;
RngRequest *req;
int size = 0;
for (i = s->requests; i; i = i->next) {
RngRequest *req = i->data;
QSIMPLEQ_FOREACH(req, &s->parent.requests, next) {
size += req->size - req->offset;
}
@@ -94,8 +64,8 @@ static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
RngEgd *s = RNG_EGD(opaque);
size_t buf_offset = 0;
while (size > 0 && s->requests) {
RngRequest *req = s->requests->data;
while (size > 0 && !QSIMPLEQ_EMPTY(&s->parent.requests)) {
RngRequest *req = QSIMPLEQ_FIRST(&s->parent.requests);
int len = MIN(size, req->size - req->offset);
memcpy(req->data + req->offset, buf + buf_offset, len);
@@ -104,38 +74,13 @@ static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
size -= len;
if (req->offset == req->size) {
s->requests = g_slist_remove_link(s->requests, s->requests);
req->receive_entropy(req->opaque, req->data, req->size);
rng_egd_free_request(req);
rng_backend_finalize_request(&s->parent, req);
}
}
}
static void rng_egd_free_requests(RngEgd *s)
{
GSList *i;
for (i = s->requests; i; i = i->next) {
rng_egd_free_request(i->data);
}
g_slist_free(s->requests);
s->requests = NULL;
}
static void rng_egd_cancel_requests(RngBackend *b)
{
RngEgd *s = RNG_EGD(b);
/* We simply delete the list of pending requests. If there is data in the
* queue waiting to be read, this is okay, because there will always be
* more data than we requested originally
*/
rng_egd_free_requests(s);
}
static void rng_egd_opened(RngBackend *b, Error **errp)
{
RngEgd *s = RNG_EGD(b);
@@ -204,8 +149,6 @@ static void rng_egd_finalize(Object *obj)
}
g_free(s->chr_name);
rng_egd_free_requests(s);
}
static void rng_egd_class_init(ObjectClass *klass, void *data)
@@ -213,7 +156,6 @@ static void rng_egd_class_init(ObjectClass *klass, void *data)
RngBackendClass *rbc = RNG_BACKEND_CLASS(klass);
rbc->request_entropy = rng_egd_request_entropy;
rbc->cancel_requests = rng_egd_cancel_requests;
rbc->opened = rng_egd_opened;
}

View File

@@ -22,10 +22,6 @@ struct RndRandom
int fd;
char *filename;
EntropyReceiveFunc *receive_func;
void *opaque;
size_t size;
};
/**
@@ -38,36 +34,35 @@ struct RndRandom
static void entropy_available(void *opaque)
{
RndRandom *s = RNG_RANDOM(opaque);
uint8_t buffer[s->size];
ssize_t len;
len = read(s->fd, buffer, s->size);
if (len < 0 && errno == EAGAIN) {
return;
while (!QSIMPLEQ_EMPTY(&s->parent.requests)) {
RngRequest *req = QSIMPLEQ_FIRST(&s->parent.requests);
ssize_t len;
len = read(s->fd, req->data, req->size);
if (len < 0 && errno == EAGAIN) {
return;
}
g_assert(len != -1);
req->receive_entropy(req->opaque, req->data, len);
rng_backend_finalize_request(&s->parent, req);
}
g_assert(len != -1);
s->receive_func(s->opaque, buffer, len);
s->receive_func = NULL;
/* We've drained all requests, the fd handler can be reset. */
qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
}
static void rng_random_request_entropy(RngBackend *b, size_t size,
EntropyReceiveFunc *receive_entropy,
void *opaque)
static void rng_random_request_entropy(RngBackend *b, RngRequest *req)
{
RndRandom *s = RNG_RANDOM(b);
if (s->receive_func) {
s->receive_func(s->opaque, NULL, 0);
if (QSIMPLEQ_EMPTY(&s->parent.requests)) {
/* If there are no pending requests yet, we need to
* install our fd handler. */
qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
}
s->receive_func = receive_entropy;
s->opaque = opaque;
s->size = size;
qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
}
static void rng_random_opened(RngBackend *b, Error **errp)

View File

@@ -20,18 +20,20 @@ void rng_backend_request_entropy(RngBackend *s, size_t size,
void *opaque)
{
RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
RngRequest *req;
if (k->request_entropy) {
k->request_entropy(s, size, receive_entropy, opaque);
}
}
req = g_malloc(sizeof(*req));
void rng_backend_cancel_requests(RngBackend *s)
{
RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
req->offset = 0;
req->size = size;
req->receive_entropy = receive_entropy;
req->opaque = opaque;
req->data = g_malloc(req->size);
if (k->cancel_requests) {
k->cancel_requests(s);
k->request_entropy(s, req);
QSIMPLEQ_INSERT_TAIL(&s->requests, req, next);
}
}
@@ -73,14 +75,48 @@ static void rng_backend_prop_set_opened(Object *obj, bool value, Error **errp)
s->opened = true;
}
static void rng_backend_free_request(RngRequest *req)
{
g_free(req->data);
g_free(req);
}
static void rng_backend_free_requests(RngBackend *s)
{
RngRequest *req, *next;
QSIMPLEQ_FOREACH_SAFE(req, &s->requests, next, next) {
rng_backend_free_request(req);
}
QSIMPLEQ_INIT(&s->requests);
}
void rng_backend_finalize_request(RngBackend *s, RngRequest *req)
{
QSIMPLEQ_REMOVE(&s->requests, req, RngRequest, next);
rng_backend_free_request(req);
}
static void rng_backend_init(Object *obj)
{
RngBackend *s = RNG_BACKEND(obj);
QSIMPLEQ_INIT(&s->requests);
object_property_add_bool(obj, "opened",
rng_backend_prop_get_opened,
rng_backend_prop_set_opened,
NULL);
}
static void rng_backend_finalize(Object *obj)
{
RngBackend *s = RNG_BACKEND(obj);
rng_backend_free_requests(s);
}
static void rng_backend_class_init(ObjectClass *oc, void *data)
{
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
@@ -93,6 +129,7 @@ static const TypeInfo rng_backend_info = {
.parent = TYPE_OBJECT,
.instance_size = sizeof(RngBackend),
.instance_init = rng_backend_init,
.instance_finalize = rng_backend_finalize,
.class_size = sizeof(RngBackendClass),
.class_init = rng_backend_class_init,
.abstract = true,

529
block.c
View File

@@ -53,27 +53,8 @@
#include <windows.h>
#endif
/**
* A BdrvDirtyBitmap can be in three possible states:
* (1) successor is NULL and disabled is false: full r/w mode
* (2) successor is NULL and disabled is true: read only mode ("disabled")
* (3) successor is set: frozen mode.
* A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
* or enabled. A frozen bitmap can only abdicate() or reclaim().
*/
struct BdrvDirtyBitmap {
HBitmap *bitmap; /* Dirty sector bitmap implementation */
BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
char *name; /* Optional non-empty unique ID */
int64_t size; /* Size of the bitmap (Number of sectors) */
bool disabled; /* Bitmap is read-only */
QLIST_ENTRY(BdrvDirtyBitmap) list;
};
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
@@ -88,9 +69,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
BlockDriverState *parent,
const BdrvChildRole *child_role, Error **errp);
static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;
@@ -246,10 +224,7 @@ void bdrv_register(BlockDriver *bdrv)
BlockDriverState *bdrv_new_root(void)
{
BlockDriverState *bs = bdrv_new();
QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
return bs;
return bdrv_new();
}
BlockDriverState *bdrv_new(void)
@@ -687,13 +662,19 @@ int bdrv_parse_cache_flags(const char *mode, int *flags)
}
/*
* Returns the flags that a temporary snapshot should get, based on the
* originally requested flags (the originally requested image will have flags
* like a backing file)
* Returns the options and flags that a temporary snapshot should get, based on
* the originally requested flags (the originally requested image will have
* flags like a backing file)
*/
static int bdrv_temp_snapshot_flags(int flags)
static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
int parent_flags, QDict *parent_options)
{
return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
*child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
/* For temporary files, unconditional cache=unsafe is fine */
qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
}
/*
@@ -1194,10 +1175,9 @@ static int bdrv_fill_options(QDict **options, const char *filename,
return 0;
}
static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role)
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role)
{
BdrvChild *child = g_new(BdrvChild, 1);
*child = (BdrvChild) {
@@ -1206,24 +1186,43 @@ static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
.role = child_role,
};
QLIST_INSERT_HEAD(&parent_bs->children, child, next);
QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
return child;
}
static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role)
{
BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
QLIST_INSERT_HEAD(&parent_bs->children, child, next);
return child;
}
static void bdrv_detach_child(BdrvChild *child)
{
QLIST_REMOVE(child, next);
if (child->next.le_prev) {
QLIST_REMOVE(child, next);
child->next.le_prev = NULL;
}
QLIST_REMOVE(child, next_parent);
g_free(child->name);
g_free(child);
}
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
void bdrv_root_unref_child(BdrvChild *child)
{
BlockDriverState *child_bs;
child_bs = child->bs;
bdrv_detach_child(child);
bdrv_unref(child_bs);
}
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
{
if (child == NULL) {
return;
}
@@ -1232,9 +1231,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
child->bs->inherits_from = NULL;
}
child_bs = child->bs;
bdrv_detach_child(child);
bdrv_unref(child_bs);
bdrv_root_unref_child(child);
}
/*
@@ -1424,13 +1421,13 @@ done:
return c;
}
int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
QDict *snapshot_options, Error **errp)
{
/* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
char *tmp_filename = g_malloc0(PATH_MAX + 1);
int64_t total_size;
QemuOpts *opts = NULL;
QDict *snapshot_options;
BlockDriverState *bs_snapshot;
Error *local_err = NULL;
int ret;
@@ -1464,8 +1461,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
goto out;
}
/* Prepare a new options QDict for the temporary file */
snapshot_options = qdict_new();
/* Prepare options QDict for the temporary file */
qdict_put(snapshot_options, "file.driver",
qstring_from_str("file"));
qdict_put(snapshot_options, "file.filename",
@@ -1477,6 +1473,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
flags, &local_err);
snapshot_options = NULL;
if (ret < 0) {
error_propagate(errp, local_err);
goto out;
@@ -1485,6 +1482,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
bdrv_append(bs_snapshot, bs);
out:
QDECREF(snapshot_options);
g_free(tmp_filename);
return ret;
}
@@ -1516,6 +1514,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
const char *drvname;
const char *backing;
Error *local_err = NULL;
QDict *snapshot_options = NULL;
int snapshot_flags = 0;
assert(pbs);
@@ -1607,7 +1606,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
flags |= BDRV_O_ALLOW_RDWR;
}
if (flags & BDRV_O_SNAPSHOT) {
snapshot_flags = bdrv_temp_snapshot_flags(flags);
snapshot_options = qdict_new();
bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
flags, options);
bdrv_backing_options(&flags, options, flags, options);
}
@@ -1681,9 +1682,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
error_setg(errp, "Block protocol '%s' doesn't support the option "
"'%s'", drv->format_name, entry->key);
} else {
error_setg(errp, "Block format '%s' used by device '%s' doesn't "
"support the option '%s'", drv->format_name,
bdrv_get_device_name(bs), entry->key);
error_setg(errp,
"Block format '%s' does not support the option '%s'",
drv->format_name, entry->key);
}
ret = -EINVAL;
@@ -1709,7 +1710,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
/* For snapshot=on, create a temporary qcow2 overlay. bs points to the
* temporary snapshot afterwards. */
if (snapshot_flags) {
ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
&local_err);
snapshot_options = NULL;
if (local_err) {
goto close_and_fail;
}
@@ -1721,6 +1724,7 @@ fail:
if (file != NULL) {
bdrv_unref_child(bs, file);
}
QDECREF(snapshot_options);
QDECREF(bs->explicit_options);
QDECREF(bs->options);
QDECREF(options);
@@ -1743,6 +1747,7 @@ close_and_fail:
} else {
bdrv_unref(bs);
}
QDECREF(snapshot_options);
QDECREF(options);
if (local_err) {
error_propagate(errp, local_err);
@@ -2236,26 +2241,10 @@ void bdrv_close_all(void)
}
}
/* Note that bs->device_list.tqe_prev is initially null,
* and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
* the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
* resetting it to null on remove. */
void bdrv_device_remove(BlockDriverState *bs)
{
QTAILQ_REMOVE(&bdrv_states, bs, device_list);
bs->device_list.tqe_prev = NULL;
}
/* make a BlockDriverState anonymous by removing from bdrv_state and
* graph_bdrv_state list.
Also, NULL terminate the device_name to prevent double remove */
/* make a BlockDriverState anonymous by removing from graph_bdrv_state list.
* Also, NULL terminate the device_name to prevent double remove */
void bdrv_make_anon(BlockDriverState *bs)
{
/* Take care to remove bs from bdrv_states only when it's actually
* in it. */
if (bs->device_list.tqe_prev) {
bdrv_device_remove(bs);
}
if (bs->node_name[0] != '\0') {
QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
}
@@ -2282,6 +2271,14 @@ static void change_parent_backing_link(BlockDriverState *from,
{
BdrvChild *c, *next;
if (from->blk) {
/* FIXME We bypass blk_set_bs(), so we need to make these updates
* manually. The root problem is not in this change function, but the
* existence of BlockDriverState.blk. */
to->blk = from->blk;
from->blk = NULL;
}
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
assert(c->role != &child_backing);
c->bs = to;
@@ -2290,13 +2287,6 @@ static void change_parent_backing_link(BlockDriverState *from,
bdrv_ref(to);
bdrv_unref(from);
}
if (from->blk) {
blk_set_bs(from->blk, to);
if (!to->device_list.tqe_prev) {
QTAILQ_INSERT_BEFORE(from, to, device_list);
}
bdrv_device_remove(from);
}
}
static void swap_feature_fields(BlockDriverState *bs_top,
@@ -2527,26 +2517,6 @@ ro_cleanup:
return ret;
}
int bdrv_commit_all(void)
{
BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
if (bs->drv && bs->backing) {
int ret = bdrv_commit(bs);
if (ret < 0) {
aio_context_release(aio_context);
return ret;
}
}
aio_context_release(aio_context);
}
return 0;
}
/*
* Return values:
* 0 - success
@@ -2995,12 +2965,23 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
return QTAILQ_NEXT(bs, node_list);
}
/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
* the monitor or attached to a BlockBackend */
BlockDriverState *bdrv_next(BlockDriverState *bs)
{
if (!bs) {
return QTAILQ_FIRST(&bdrv_states);
if (!bs || bs->blk) {
bs = blk_next_root_bs(bs);
if (bs) {
return bs;
}
}
return QTAILQ_NEXT(bs, device_list);
/* Ignore all BDSs that are attached to a BlockBackend here; they have been
* handled by the above block already */
do {
bs = bdrv_next_monitor_owned(bs);
} while (bs && bs->blk);
return bs;
}
const char *bdrv_get_node_name(const BlockDriverState *bs)
@@ -3308,10 +3289,10 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
void bdrv_invalidate_cache_all(Error **errp)
{
BlockDriverState *bs;
BlockDriverState *bs = NULL;
Error *local_err = NULL;
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
while ((bs = bdrv_next(bs)) != NULL) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
@@ -3341,10 +3322,10 @@ static int bdrv_inactivate(BlockDriverState *bs)
int bdrv_inactivate_all(void)
{
BlockDriverState *bs;
BlockDriverState *bs = NULL;
int ret;
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
while ((bs = bdrv_next(bs)) != NULL) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
@@ -3431,346 +3412,6 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked)
}
}
BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
{
BdrvDirtyBitmap *bm;
assert(name);
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
if (bm->name && !strcmp(name, bm->name)) {
return bm;
}
}
return NULL;
}
void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
g_free(bitmap->name);
bitmap->name = NULL;
}
BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
uint32_t granularity,
const char *name,
Error **errp)
{
int64_t bitmap_size;
BdrvDirtyBitmap *bitmap;
uint32_t sector_granularity;
assert((granularity & (granularity - 1)) == 0);
if (name && bdrv_find_dirty_bitmap(bs, name)) {
error_setg(errp, "Bitmap already exists: %s", name);
return NULL;
}
sector_granularity = granularity >> BDRV_SECTOR_BITS;
assert(sector_granularity);
bitmap_size = bdrv_nb_sectors(bs);
if (bitmap_size < 0) {
error_setg_errno(errp, -bitmap_size, "could not get length of device");
errno = -bitmap_size;
return NULL;
}
bitmap = g_new0(BdrvDirtyBitmap, 1);
bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
bitmap->size = bitmap_size;
bitmap->name = g_strdup(name);
bitmap->disabled = false;
QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
return bitmap;
}
bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
{
return bitmap->successor;
}
bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
{
return !(bitmap->disabled || bitmap->successor);
}
DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
{
if (bdrv_dirty_bitmap_frozen(bitmap)) {
return DIRTY_BITMAP_STATUS_FROZEN;
} else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
return DIRTY_BITMAP_STATUS_DISABLED;
} else {
return DIRTY_BITMAP_STATUS_ACTIVE;
}
}
/**
* Create a successor bitmap destined to replace this bitmap after an operation.
* Requires that the bitmap is not frozen and has no successor.
*/
int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, Error **errp)
{
uint64_t granularity;
BdrvDirtyBitmap *child;
if (bdrv_dirty_bitmap_frozen(bitmap)) {
error_setg(errp, "Cannot create a successor for a bitmap that is "
"currently frozen");
return -1;
}
assert(!bitmap->successor);
/* Create an anonymous successor */
granularity = bdrv_dirty_bitmap_granularity(bitmap);
child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
if (!child) {
return -1;
}
/* Successor will be on or off based on our current state. */
child->disabled = bitmap->disabled;
/* Install the successor and freeze the parent */
bitmap->successor = child;
return 0;
}
/**
* For a bitmap with a successor, yield our name to the successor,
* delete the old bitmap, and return a handle to the new bitmap.
*/
BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
Error **errp)
{
char *name;
BdrvDirtyBitmap *successor = bitmap->successor;
if (successor == NULL) {
error_setg(errp, "Cannot relinquish control if "
"there's no successor present");
return NULL;
}
name = bitmap->name;
bitmap->name = NULL;
successor->name = name;
bitmap->successor = NULL;
bdrv_release_dirty_bitmap(bs, bitmap);
return successor;
}
/**
* In cases of failure where we can no longer safely delete the parent,
* we may wish to re-join the parent and child/successor.
* The merged parent will be un-frozen, but not explicitly re-enabled.
*/
BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *parent,
Error **errp)
{
BdrvDirtyBitmap *successor = parent->successor;
if (!successor) {
error_setg(errp, "Cannot reclaim a successor when none is present");
return NULL;
}
if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
error_setg(errp, "Merging of parent and successor bitmap failed");
return NULL;
}
bdrv_release_dirty_bitmap(bs, successor);
parent->successor = NULL;
return parent;
}
/**
* Truncates _all_ bitmaps attached to a BDS.
*/
static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
{
BdrvDirtyBitmap *bitmap;
uint64_t size = bdrv_nb_sectors(bs);
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
assert(!bdrv_dirty_bitmap_frozen(bitmap));
hbitmap_truncate(bitmap->bitmap, size);
bitmap->size = size;
}
}
static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
bool only_named)
{
BdrvDirtyBitmap *bm, *next;
QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
assert(!bdrv_dirty_bitmap_frozen(bm));
QLIST_REMOVE(bm, list);
hbitmap_free(bm->bitmap);
g_free(bm->name);
g_free(bm);
if (bitmap) {
return;
}
}
}
}
void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
{
bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
}
/**
* Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
* There must not be any frozen bitmaps attached.
*/
static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
{
bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
}
void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
bitmap->disabled = true;
}
void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
bitmap->disabled = false;
}
BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
{
BdrvDirtyBitmap *bm;
BlockDirtyInfoList *list = NULL;
BlockDirtyInfoList **plist = &list;
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
info->count = bdrv_get_dirty_count(bm);
info->granularity = bdrv_dirty_bitmap_granularity(bm);
info->has_name = !!bm->name;
info->name = g_strdup(bm->name);
info->status = bdrv_dirty_bitmap_status(bm);
entry->value = info;
*plist = entry;
plist = &entry->next;
}
return list;
}
int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
{
if (bitmap) {
return hbitmap_get(bitmap->bitmap, sector);
} else {
return 0;
}
}
/**
* Chooses a default granularity based on the existing cluster size,
* but clamped between [4K, 64K]. Defaults to 64K in the case that there
* is no cluster size information available.
*/
uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
{
BlockDriverInfo bdi;
uint32_t granularity;
if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
granularity = MAX(4096, bdi.cluster_size);
granularity = MIN(65536, granularity);
} else {
granularity = 65536;
}
return granularity;
}
uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
{
return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
}
void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
{
hbitmap_iter_init(hbi, bitmap->bitmap, 0);
}
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
}
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
if (!out) {
hbitmap_reset_all(bitmap->bitmap);
} else {
HBitmap *backup = bitmap->bitmap;
bitmap->bitmap = hbitmap_alloc(bitmap->size,
hbitmap_granularity(backup));
*out = backup;
}
}
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
{
HBitmap *tmp = bitmap->bitmap;
assert(bdrv_dirty_bitmap_enabled(bitmap));
bitmap->bitmap = in;
hbitmap_free(tmp);
}
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
int nr_sectors)
{
BdrvDirtyBitmap *bitmap;
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
if (!bdrv_dirty_bitmap_enabled(bitmap)) {
continue;
}
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}
}
/**
* Advance an HBitmapIter to an arbitrary offset.
*/
void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
{
assert(hbi->hb);
hbitmap_iter_init(hbi, hbi->hb, offset);
}
int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
{
return hbitmap_count(bitmap->bitmap);
}
/* Get a reference to bs */
void bdrv_ref(BlockDriverState *bs)
{
@@ -4190,10 +3831,10 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
*/
bool bdrv_is_first_non_filter(BlockDriverState *candidate)
{
BlockDriverState *bs;
BlockDriverState *bs = NULL;
/* walk down the bs forest recursively */
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
while ((bs = bdrv_next(bs)) != NULL) {
bool perm;
/* try to recurse in this top level bs */

View File

@@ -20,7 +20,7 @@ block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
block-obj-y += accounting.o
block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
common-obj-y += stream.o

View File

@@ -20,11 +20,9 @@
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
#include "sysemu/block-backend.h"
#include "qemu/bitmap.h"
#define BACKUP_CLUSTER_BITS 16
#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
#define SLICE_TIME 100000000ULL /* ns */
typedef struct CowRequest {
@@ -45,10 +43,17 @@ typedef struct BackupBlockJob {
BlockdevOnError on_target_error;
CoRwlock flush_rwlock;
uint64_t sectors_read;
HBitmap *bitmap;
unsigned long *done_bitmap;
int64_t cluster_size;
QLIST_HEAD(, CowRequest) inflight_reqs;
} BackupBlockJob;
/* Size of a cluster in sectors, instead of bytes. */
static inline int64_t cluster_size_sectors(BackupBlockJob *job)
{
return job->cluster_size / BDRV_SECTOR_SIZE;
}
/* See if in-flight requests overlap and wait for them to complete */
static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
int64_t start,
@@ -97,13 +102,14 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
QEMUIOVector bounce_qiov;
void *bounce_buffer = NULL;
int ret = 0;
int64_t sectors_per_cluster = cluster_size_sectors(job);
int64_t start, end;
int n;
qemu_co_rwlock_rdlock(&job->flush_rwlock);
start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);
start = sector_num / sectors_per_cluster;
end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
@@ -111,19 +117,19 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
cow_request_begin(&cow_request, job, start, end);
for (; start < end; start++) {
if (hbitmap_get(job->bitmap, start)) {
if (test_bit(start, job->done_bitmap)) {
trace_backup_do_cow_skip(job, start);
continue; /* already copied */
}
trace_backup_do_cow_process(job, start);
n = MIN(BACKUP_SECTORS_PER_CLUSTER,
n = MIN(sectors_per_cluster,
job->common.len / BDRV_SECTOR_SIZE -
start * BACKUP_SECTORS_PER_CLUSTER);
start * sectors_per_cluster);
if (!bounce_buffer) {
bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
bounce_buffer = qemu_blockalign(bs, job->cluster_size);
}
iov.iov_base = bounce_buffer;
iov.iov_len = n * BDRV_SECTOR_SIZE;
@@ -131,10 +137,10 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
if (is_write_notifier) {
ret = bdrv_co_readv_no_serialising(bs,
start * BACKUP_SECTORS_PER_CLUSTER,
start * sectors_per_cluster,
n, &bounce_qiov);
} else {
ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
ret = bdrv_co_readv(bs, start * sectors_per_cluster, n,
&bounce_qiov);
}
if (ret < 0) {
@@ -147,11 +153,11 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
ret = bdrv_co_write_zeroes(job->target,
start * BACKUP_SECTORS_PER_CLUSTER,
start * sectors_per_cluster,
n, BDRV_REQ_MAY_UNMAP);
} else {
ret = bdrv_co_writev(job->target,
start * BACKUP_SECTORS_PER_CLUSTER, n,
start * sectors_per_cluster, n,
&bounce_qiov);
}
if (ret < 0) {
@@ -162,7 +168,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
goto out;
}
hbitmap_set(job->bitmap, start, 1);
set_bit(start, job->done_bitmap);
/* Publish progress, guest I/O counts as progress too. Note that the
* offset field is an opaque progress value, it is not a disk offset.
@@ -322,21 +328,22 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
int64_t cluster;
int64_t end;
int64_t last_cluster = -1;
int64_t sectors_per_cluster = cluster_size_sectors(job);
BlockDriverState *bs = job->common.bs;
HBitmapIter hbi;
granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
clusters_per_iter = MAX((granularity / BACKUP_CLUSTER_SIZE), 1);
clusters_per_iter = MAX((granularity / job->cluster_size), 1);
bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
/* Find the next dirty sector(s) */
while ((sector = hbitmap_iter_next(&hbi)) != -1) {
cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
cluster = sector / sectors_per_cluster;
/* Fake progress updates for any clusters we skipped */
if (cluster != last_cluster + 1) {
job->common.offset += ((cluster - last_cluster - 1) *
BACKUP_CLUSTER_SIZE);
job->cluster_size);
}
for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
@@ -344,8 +351,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
if (yield_and_check(job)) {
return ret;
}
ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
BACKUP_SECTORS_PER_CLUSTER, &error_is_read,
ret = backup_do_cow(bs, cluster * sectors_per_cluster,
sectors_per_cluster, &error_is_read,
false);
if ((ret < 0) &&
backup_error_action(job, error_is_read, -ret) ==
@@ -357,17 +364,17 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
/* If the bitmap granularity is smaller than the backup granularity,
* we need to advance the iterator pointer to the next cluster. */
if (granularity < BACKUP_CLUSTER_SIZE) {
bdrv_set_dirty_iter(&hbi, cluster * BACKUP_SECTORS_PER_CLUSTER);
if (granularity < job->cluster_size) {
bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster);
}
last_cluster = cluster - 1;
}
/* Play some final catchup with the progress meter */
end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
end = DIV_ROUND_UP(job->common.len, job->cluster_size);
if (last_cluster + 1 < end) {
job->common.offset += ((end - last_cluster - 1) * BACKUP_CLUSTER_SIZE);
job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
}
return ret;
@@ -384,15 +391,16 @@ static void coroutine_fn backup_run(void *opaque)
.notify = backup_before_write_notify,
};
int64_t start, end;
int64_t sectors_per_cluster = cluster_size_sectors(job);
int ret = 0;
QLIST_INIT(&job->inflight_reqs);
qemu_co_rwlock_init(&job->flush_rwlock);
start = 0;
end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
end = DIV_ROUND_UP(job->common.len, job->cluster_size);
job->bitmap = hbitmap_alloc(end, 0);
job->done_bitmap = bitmap_new(end);
bdrv_set_enable_write_cache(target, true);
if (target->blk) {
@@ -427,7 +435,7 @@ static void coroutine_fn backup_run(void *opaque)
/* Check to see if these blocks are already in the
* backing file. */
for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
for (i = 0; i < sectors_per_cluster;) {
/* bdrv_is_allocated() only returns true/false based
* on the first set of sectors it comes across that
* are are all in the same state.
@@ -436,8 +444,8 @@ static void coroutine_fn backup_run(void *opaque)
* needed but at some point that is always the case. */
alloced =
bdrv_is_allocated(bs,
start * BACKUP_SECTORS_PER_CLUSTER + i,
BACKUP_SECTORS_PER_CLUSTER - i, &n);
start * sectors_per_cluster + i,
sectors_per_cluster - i, &n);
i += n;
if (alloced == 1 || n == 0) {
@@ -452,8 +460,8 @@ static void coroutine_fn backup_run(void *opaque)
}
}
/* FULL sync mode we copy the whole drive. */
ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false);
ret = backup_do_cow(bs, start * sectors_per_cluster,
sectors_per_cluster, &error_is_read, false);
if (ret < 0) {
/* Depending on error action, fail now or retry cluster */
BlockErrorAction action =
@@ -473,7 +481,7 @@ static void coroutine_fn backup_run(void *opaque)
/* wait until pending backup_do_cow() calls have completed */
qemu_co_rwlock_wrlock(&job->flush_rwlock);
qemu_co_rwlock_unlock(&job->flush_rwlock);
hbitmap_free(job->bitmap);
g_free(job->done_bitmap);
if (target->blk) {
blk_iostatus_disable(target->blk);
@@ -494,6 +502,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
BlockJobTxn *txn, Error **errp)
{
int64_t len;
BlockDriverInfo bdi;
int ret;
assert(bs);
assert(target);
@@ -563,14 +573,32 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
goto error;
}
bdrv_op_block_all(target, job->common.blocker);
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
job->target = target;
job->sync_mode = sync_mode;
job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
sync_bitmap : NULL;
/* If there is no backing file on the target, we cannot rely on COW if our
* backup cluster size is smaller than the target cluster size. Even for
* targets with a backing file, try to avoid COW if possible. */
ret = bdrv_get_info(job->target, &bdi);
if (ret < 0 && !target->backing) {
error_setg_errno(errp, -ret,
"Couldn't determine the cluster size of the target image, "
"which has no backing file");
error_append_hint(errp,
"Aborting, since this may create an unusable destination image\n");
goto error;
} else if (ret < 0 && target->backing) {
/* Not fatal; just trudge on ahead. */
job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
} else {
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}
bdrv_op_block_all(target, job->common.blocker);
job->common.len = len;
job->common.co = qemu_coroutine_create(backup_run);
block_job_txn_add_job(txn, &job->common);

File diff suppressed because it is too large Load Diff

View File

@@ -27,6 +27,7 @@
#include "block/block_int.h"
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qstring.h"
#include "crypto/secret.h"
#include <curl/curl.h>
// #define DEBUG_CURL
@@ -78,6 +79,10 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
#define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
#define CURL_BLOCK_OPT_TIMEOUT "timeout"
#define CURL_BLOCK_OPT_COOKIE "cookie"
#define CURL_BLOCK_OPT_USERNAME "username"
#define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
#define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
#define CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET "proxy-password-secret"
struct BDRVCURLState;
@@ -120,6 +125,10 @@ typedef struct BDRVCURLState {
char *cookie;
bool accept_range;
AioContext *aio_context;
char *username;
char *password;
char *proxyusername;
char *proxypassword;
} BDRVCURLState;
static void curl_clean_state(CURLState *s);
@@ -419,6 +428,21 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);
if (s->username) {
curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username);
}
if (s->password) {
curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password);
}
if (s->proxyusername) {
curl_easy_setopt(state->curl,
CURLOPT_PROXYUSERNAME, s->proxyusername);
}
if (s->proxypassword) {
curl_easy_setopt(state->curl,
CURLOPT_PROXYPASSWORD, s->proxypassword);
}
/* Restrict supported protocols to avoid security issues in the more
* obscure protocols. For example, do not allow POP3/SMTP/IMAP see
* CVE-2013-0249.
@@ -525,10 +549,31 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "Pass the cookie or list of cookies with each request"
},
{
.name = CURL_BLOCK_OPT_USERNAME,
.type = QEMU_OPT_STRING,
.help = "Username for HTTP auth"
},
{
.name = CURL_BLOCK_OPT_PASSWORD_SECRET,
.type = QEMU_OPT_STRING,
.help = "ID of secret used as password for HTTP auth",
},
{
.name = CURL_BLOCK_OPT_PROXY_USERNAME,
.type = QEMU_OPT_STRING,
.help = "Username for HTTP proxy auth"
},
{
.name = CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET,
.type = QEMU_OPT_STRING,
.help = "ID of secret used as password for HTTP proxy auth",
},
{ /* end of list */ }
},
};
static int curl_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -539,6 +584,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
const char *file;
const char *cookie;
double d;
const char *secretid;
static int inited = 0;
@@ -580,6 +626,26 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
goto out_noclean;
}
s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);
if (secretid) {
s->password = qcrypto_secret_lookup_as_utf8(secretid, errp);
if (!s->password) {
goto out_noclean;
}
}
s->proxyusername = g_strdup(
qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_USERNAME));
secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET);
if (secretid) {
s->proxypassword = qcrypto_secret_lookup_as_utf8(secretid, errp);
if (!s->proxypassword) {
goto out_noclean;
}
}
if (!inited) {
curl_global_init(CURL_GLOBAL_ALL);
inited = 1;

387
block/dirty-bitmap.c Normal file
View File

@@ -0,0 +1,387 @@
/*
* Block Dirty Bitmap
*
* Copyright (c) 2016 Red Hat. Inc
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "config-host.h"
#include "qemu-common.h"
#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob.h"
/**
* A BdrvDirtyBitmap can be in three possible states:
* (1) successor is NULL and disabled is false: full r/w mode
* (2) successor is NULL and disabled is true: read only mode ("disabled")
* (3) successor is set: frozen mode.
* A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
* or enabled. A frozen bitmap can only abdicate() or reclaim().
*/
struct BdrvDirtyBitmap {
HBitmap *bitmap; /* Dirty sector bitmap implementation */
BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
char *name; /* Optional non-empty unique ID */
int64_t size; /* Size of the bitmap (Number of sectors) */
bool disabled; /* Bitmap is read-only */
QLIST_ENTRY(BdrvDirtyBitmap) list;
};
BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
{
BdrvDirtyBitmap *bm;
assert(name);
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
if (bm->name && !strcmp(name, bm->name)) {
return bm;
}
}
return NULL;
}
void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
g_free(bitmap->name);
bitmap->name = NULL;
}
BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
uint32_t granularity,
const char *name,
Error **errp)
{
int64_t bitmap_size;
BdrvDirtyBitmap *bitmap;
uint32_t sector_granularity;
assert((granularity & (granularity - 1)) == 0);
if (name && bdrv_find_dirty_bitmap(bs, name)) {
error_setg(errp, "Bitmap already exists: %s", name);
return NULL;
}
sector_granularity = granularity >> BDRV_SECTOR_BITS;
assert(sector_granularity);
bitmap_size = bdrv_nb_sectors(bs);
if (bitmap_size < 0) {
error_setg_errno(errp, -bitmap_size, "could not get length of device");
errno = -bitmap_size;
return NULL;
}
bitmap = g_new0(BdrvDirtyBitmap, 1);
bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
bitmap->size = bitmap_size;
bitmap->name = g_strdup(name);
bitmap->disabled = false;
QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
return bitmap;
}
bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
{
return bitmap->successor;
}
bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
{
return !(bitmap->disabled || bitmap->successor);
}
DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
{
if (bdrv_dirty_bitmap_frozen(bitmap)) {
return DIRTY_BITMAP_STATUS_FROZEN;
} else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
return DIRTY_BITMAP_STATUS_DISABLED;
} else {
return DIRTY_BITMAP_STATUS_ACTIVE;
}
}
/**
* Create a successor bitmap destined to replace this bitmap after an operation.
* Requires that the bitmap is not frozen and has no successor.
*/
int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, Error **errp)
{
uint64_t granularity;
BdrvDirtyBitmap *child;
if (bdrv_dirty_bitmap_frozen(bitmap)) {
error_setg(errp, "Cannot create a successor for a bitmap that is "
"currently frozen");
return -1;
}
assert(!bitmap->successor);
/* Create an anonymous successor */
granularity = bdrv_dirty_bitmap_granularity(bitmap);
child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
if (!child) {
return -1;
}
/* Successor will be on or off based on our current state. */
child->disabled = bitmap->disabled;
/* Install the successor and freeze the parent */
bitmap->successor = child;
return 0;
}
/**
* For a bitmap with a successor, yield our name to the successor,
* delete the old bitmap, and return a handle to the new bitmap.
*/
BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
Error **errp)
{
char *name;
BdrvDirtyBitmap *successor = bitmap->successor;
if (successor == NULL) {
error_setg(errp, "Cannot relinquish control if "
"there's no successor present");
return NULL;
}
name = bitmap->name;
bitmap->name = NULL;
successor->name = name;
bitmap->successor = NULL;
bdrv_release_dirty_bitmap(bs, bitmap);
return successor;
}
/**
* In cases of failure where we can no longer safely delete the parent,
* we may wish to re-join the parent and child/successor.
* The merged parent will be un-frozen, but not explicitly re-enabled.
*/
BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *parent,
Error **errp)
{
BdrvDirtyBitmap *successor = parent->successor;
if (!successor) {
error_setg(errp, "Cannot reclaim a successor when none is present");
return NULL;
}
if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
error_setg(errp, "Merging of parent and successor bitmap failed");
return NULL;
}
bdrv_release_dirty_bitmap(bs, successor);
parent->successor = NULL;
return parent;
}
/**
* Truncates _all_ bitmaps attached to a BDS.
*/
void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
{
BdrvDirtyBitmap *bitmap;
uint64_t size = bdrv_nb_sectors(bs);
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
assert(!bdrv_dirty_bitmap_frozen(bitmap));
hbitmap_truncate(bitmap->bitmap, size);
bitmap->size = size;
}
}
static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
bool only_named)
{
BdrvDirtyBitmap *bm, *next;
QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
assert(!bdrv_dirty_bitmap_frozen(bm));
QLIST_REMOVE(bm, list);
hbitmap_free(bm->bitmap);
g_free(bm->name);
g_free(bm);
if (bitmap) {
return;
}
}
}
}
void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
{
bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
}
/**
* Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
* There must not be any frozen bitmaps attached.
*/
void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
{
bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
}
void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
bitmap->disabled = true;
}
void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
assert(!bdrv_dirty_bitmap_frozen(bitmap));
bitmap->disabled = false;
}
BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
{
BdrvDirtyBitmap *bm;
BlockDirtyInfoList *list = NULL;
BlockDirtyInfoList **plist = &list;
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
info->count = bdrv_get_dirty_count(bm);
info->granularity = bdrv_dirty_bitmap_granularity(bm);
info->has_name = !!bm->name;
info->name = g_strdup(bm->name);
info->status = bdrv_dirty_bitmap_status(bm);
entry->value = info;
*plist = entry;
plist = &entry->next;
}
return list;
}
int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
int64_t sector)
{
if (bitmap) {
return hbitmap_get(bitmap->bitmap, sector);
} else {
return 0;
}
}
/**
* Chooses a default granularity based on the existing cluster size,
* but clamped between [4K, 64K]. Defaults to 64K in the case that there
* is no cluster size information available.
*/
uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
{
BlockDriverInfo bdi;
uint32_t granularity;
if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
granularity = MAX(4096, bdi.cluster_size);
granularity = MIN(65536, granularity);
} else {
granularity = 65536;
}
return granularity;
}
uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
{
return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
}
void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
{
hbitmap_iter_init(hbi, bitmap->bitmap, 0);
}
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
}
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
if (!out) {
hbitmap_reset_all(bitmap->bitmap);
} else {
HBitmap *backup = bitmap->bitmap;
bitmap->bitmap = hbitmap_alloc(bitmap->size,
hbitmap_granularity(backup));
*out = backup;
}
}
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
{
HBitmap *tmp = bitmap->bitmap;
assert(bdrv_dirty_bitmap_enabled(bitmap));
bitmap->bitmap = in;
hbitmap_free(tmp);
}
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
int nr_sectors)
{
BdrvDirtyBitmap *bitmap;
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
if (!bdrv_dirty_bitmap_enabled(bitmap)) {
continue;
}
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}
}
/**
* Advance an HBitmapIter to an arbitrary offset.
*/
void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
{
assert(hbi->hb);
hbitmap_iter_init(hbi, hbi->hb, offset);
}
int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
{
return hbitmap_count(bitmap->bitmap);
}

View File

@@ -44,12 +44,6 @@ static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov);
static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@@ -621,20 +615,6 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
}
/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
bool enabled;
int ret;
enabled = bs->io_limits_enabled;
bs->io_limits_enabled = false;
ret = bdrv_read(bs, sector_num, buf, nb_sectors);
bs->io_limits_enabled = enabled;
return ret;
}
/* Return < 0 if error. Important errors are:
-EIO generic I/O error (may happen for all errors)
-ENOMEDIUM No media inserted.
@@ -939,7 +919,7 @@ out:
/*
* Handle a read request in coroutine context
*/
static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -1284,7 +1264,7 @@ fail:
/*
* Handle a write request in coroutine context
*/
static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -1445,26 +1425,6 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
BDRV_REQ_ZERO_WRITE | flags);
}
int bdrv_flush_all(void)
{
BlockDriverState *bs = NULL;
int result = 0;
while ((bs = bdrv_next(bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
int ret;
aio_context_acquire(aio_context);
ret = bdrv_flush(bs);
if (ret < 0 && !result) {
result = ret;
}
aio_context_release(aio_context);
}
return result;
}
typedef struct BdrvCoGetBlockStatusData {
BlockDriverState *bs;
BlockDriverState *base;

View File

@@ -39,6 +39,7 @@
#include "sysemu/sysemu.h"
#include "qmp-commands.h"
#include "qapi/qmp/qstring.h"
#include "crypto/secret.h"
#include <iscsi/iscsi.h>
#include <iscsi/scsi-lowlevel.h>
@@ -1080,6 +1081,8 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
QemuOpts *opts;
const char *user = NULL;
const char *password = NULL;
const char *secretid;
char *secret = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
@@ -1099,8 +1102,20 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
return;
}
secretid = qemu_opt_get(opts, "password-secret");
password = qemu_opt_get(opts, "password");
if (!password) {
if (secretid && password) {
error_setg(errp, "'password' and 'password-secret' properties are "
"mutually exclusive");
return;
}
if (secretid) {
secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
if (!secret) {
return;
}
password = secret;
} else if (!password) {
error_setg(errp, "CHAP username specified but no password was given");
return;
}
@@ -1108,6 +1123,8 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
error_setg(errp, "Failed to set initiator username and password");
}
g_free(secret);
}
static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
@@ -1857,6 +1874,11 @@ static QemuOptsList qemu_iscsi_opts = {
.name = "password",
.type = QEMU_OPT_STRING,
.help = "password for CHAP authentication to target",
},{
.name = "password-secret",
.type = QEMU_OPT_STRING,
.help = "ID of the secret providing password for CHAP "
"authentication to target",
},{
.name = "header-digest",
.type = QEMU_OPT_STRING,

View File

@@ -47,7 +47,6 @@ typedef struct MirrorBlockJob {
BlockdevOnError on_source_error, on_target_error;
bool synced;
bool should_complete;
int64_t sector_num;
int64_t granularity;
size_t buf_size;
int64_t bdev_length;
@@ -64,6 +63,8 @@ typedef struct MirrorBlockJob {
int ret;
bool unmap;
bool waiting_for_io;
int target_cluster_sectors;
int max_iov;
} MirrorBlockJob;
typedef struct MirrorOp {
@@ -159,115 +160,84 @@ static void mirror_read_complete(void *opaque, int ret)
mirror_write_complete, op);
}
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
* return the offset of the adjusted tail sector against original. */
static int mirror_cow_align(MirrorBlockJob *s,
int64_t *sector_num,
int *nb_sectors)
{
bool need_cow;
int ret = 0;
int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
int64_t align_sector_num = *sector_num;
int align_nb_sectors = *nb_sectors;
int max_sectors = chunk_sectors * s->max_iov;
need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
s->cow_bitmap);
if (need_cow) {
bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
&align_sector_num, &align_nb_sectors);
}
if (align_nb_sectors > max_sectors) {
align_nb_sectors = max_sectors;
if (need_cow) {
align_nb_sectors = QEMU_ALIGN_DOWN(align_nb_sectors,
s->target_cluster_sectors);
}
}
ret = align_sector_num + align_nb_sectors - (*sector_num + *nb_sectors);
*sector_num = align_sector_num;
*nb_sectors = align_nb_sectors;
assert(ret >= 0);
return ret;
}
static inline void mirror_wait_for_io(MirrorBlockJob *s)
{
assert(!s->waiting_for_io);
s->waiting_for_io = true;
qemu_coroutine_yield();
s->waiting_for_io = false;
}
/* Submit async read while handling COW.
* Returns: nb_sectors if no alignment is necessary, or
* (new_end - sector_num) if tail is rounded up or down due to
* alignment or buffer limit.
*/
static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
int nb_sectors)
{
BlockDriverState *source = s->common.bs;
int nb_sectors, sectors_per_chunk, nb_chunks, max_iov;
int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
uint64_t delay_ns = 0;
int sectors_per_chunk, nb_chunks;
int ret = nb_sectors;
MirrorOp *op;
int pnum;
int64_t ret;
BlockDriverState *file;
max_iov = MIN(source->bl.max_iov, s->target->bl.max_iov);
s->sector_num = hbitmap_iter_next(&s->hbi);
if (s->sector_num < 0) {
bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
s->sector_num = hbitmap_iter_next(&s->hbi);
trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
assert(s->sector_num >= 0);
}
hbitmap_next_sector = s->sector_num;
sector_num = s->sector_num;
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
end = s->bdev_length / BDRV_SECTOR_SIZE;
/* Extend the QEMUIOVector to include all adjacent blocks that will
* be copied in this operation.
*
* We have to do this if we have no backing file yet in the destination,
* and the cluster size is very large. Then we need to do COW ourselves.
* The first time a cluster is copied, copy it entirely. Note that,
* because both the granularity and the cluster size are powers of two,
* the number of sectors to copy cannot exceed one cluster.
*
* We also want to extend the QEMUIOVector to include more adjacent
* dirty blocks if possible, to limit the number of I/O operations and
* run efficiently even with a small granularity.
*/
nb_chunks = 0;
nb_sectors = 0;
next_sector = sector_num;
next_chunk = sector_num / sectors_per_chunk;
/* We can only handle as much as buf_size at a time. */
nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
assert(nb_sectors);
/* Wait for I/O to this cluster (from a previous iteration) to be done. */
while (test_bit(next_chunk, s->in_flight_bitmap)) {
trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
s->waiting_for_io = true;
qemu_coroutine_yield();
s->waiting_for_io = false;
if (s->cow_bitmap) {
ret += mirror_cow_align(s, &sector_num, &nb_sectors);
}
assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size);
/* The sector range must meet granularity because:
* 1) Caller passes in aligned values;
* 2) mirror_cow_align is used only when target cluster is larger. */
assert(!(nb_sectors % sectors_per_chunk));
assert(!(sector_num % sectors_per_chunk));
nb_chunks = nb_sectors / sectors_per_chunk;
do {
int added_sectors, added_chunks;
if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
test_bit(next_chunk, s->in_flight_bitmap)) {
assert(nb_sectors > 0);
break;
}
added_sectors = sectors_per_chunk;
if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
bdrv_round_to_clusters(s->target,
next_sector, added_sectors,
&next_sector, &added_sectors);
/* On the first iteration, the rounding may make us copy
* sectors before the first dirty one.
*/
if (next_sector < sector_num) {
assert(nb_sectors == 0);
sector_num = next_sector;
next_chunk = next_sector / sectors_per_chunk;
}
}
added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
/* When doing COW, it may happen that there is not enough space for
* a full cluster. Wait if that is the case.
*/
while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
s->waiting_for_io = true;
qemu_coroutine_yield();
s->waiting_for_io = false;
}
if (s->buf_free_count < nb_chunks + added_chunks) {
trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
break;
}
if (max_iov < nb_chunks + added_chunks) {
trace_mirror_break_iov_max(s, nb_chunks, added_chunks);
break;
}
/* We have enough free space to copy these sectors. */
bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
nb_sectors += added_sectors;
nb_chunks += added_chunks;
next_sector += added_sectors;
next_chunk += added_chunks;
if (!s->synced && s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
}
} while (delay_ns == 0 && next_sector < end);
while (s->buf_free_count < nb_chunks) {
trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
mirror_wait_for_io(s);
}
/* Allocate a MirrorOp that is used as an AIO callback. */
op = g_new(MirrorOp, 1);
@@ -279,47 +249,151 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
* from s->buf_free.
*/
qemu_iovec_init(&op->qiov, nb_chunks);
next_sector = sector_num;
while (nb_chunks-- > 0) {
MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;
size_t remaining = nb_sectors * BDRV_SECTOR_SIZE - op->qiov.size;
QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
s->buf_free_count--;
qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
/* Advance the HBitmapIter in parallel, so that we do not examine
* the same sector twice.
*/
if (next_sector > hbitmap_next_sector
&& bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
}
next_sector += sectors_per_chunk;
}
bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);
/* Copy the dirty cluster. */
s->in_flight++;
s->sectors_in_flight += nb_sectors;
trace_mirror_one_iteration(s, sector_num, nb_sectors);
ret = bdrv_get_block_status_above(source, NULL, sector_num,
nb_sectors, &pnum, &file);
if (ret < 0 || pnum < nb_sectors ||
(ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
mirror_read_complete, op);
} else if (ret & BDRV_BLOCK_ZERO) {
bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
mirror_read_complete, op);
return ret;
}
static void mirror_do_zero_or_discard(MirrorBlockJob *s,
int64_t sector_num,
int nb_sectors,
bool is_discard)
{
MirrorOp *op;
/* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed
* so the freeing in mirror_iteration_done is nop. */
op = g_new0(MirrorOp, 1);
op->s = s;
op->sector_num = sector_num;
op->nb_sectors = nb_sectors;
s->in_flight++;
s->sectors_in_flight += nb_sectors;
if (is_discard) {
bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
mirror_write_complete, op);
} else {
bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
mirror_write_complete, op);
} else {
assert(!(ret & BDRV_BLOCK_DATA));
bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
mirror_write_complete, op);
}
}
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
BlockDriverState *source = s->common.bs;
int64_t sector_num;
uint64_t delay_ns = 0;
/* At least the first dirty chunk is mirrored in one iteration. */
int nb_chunks = 1;
int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
sector_num = hbitmap_iter_next(&s->hbi);
if (sector_num < 0) {
bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
sector_num = hbitmap_iter_next(&s->hbi);
trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
assert(sector_num >= 0);
}
/* Find the number of consective dirty chunks following the first dirty
* one, and wait for in flight requests in them. */
while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
int64_t hbitmap_next;
int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
int64_t next_chunk = next_sector / sectors_per_chunk;
if (next_sector >= end ||
!bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
break;
}
if (test_bit(next_chunk, s->in_flight_bitmap)) {
if (nb_chunks > 0) {
break;
}
trace_mirror_yield_in_flight(s, next_sector, s->in_flight);
mirror_wait_for_io(s);
/* Now retry. */
} else {
hbitmap_next = hbitmap_iter_next(&s->hbi);
assert(hbitmap_next == next_sector);
nb_chunks++;
}
}
/* Clear dirty bits before querying the block status, because
* calling bdrv_get_block_status_above could yield - if some blocks are
* marked dirty in this window, we need to know.
*/
bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
nb_chunks * sectors_per_chunk);
bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
while (nb_chunks > 0 && sector_num < end) {
int ret;
int io_sectors;
BlockDriverState *file;
enum MirrorMethod {
MIRROR_METHOD_COPY,
MIRROR_METHOD_ZERO,
MIRROR_METHOD_DISCARD
} mirror_method = MIRROR_METHOD_COPY;
assert(!(sector_num % sectors_per_chunk));
ret = bdrv_get_block_status_above(source, NULL, sector_num,
nb_chunks * sectors_per_chunk,
&io_sectors, &file);
if (ret < 0) {
io_sectors = nb_chunks * sectors_per_chunk;
}
io_sectors -= io_sectors % sectors_per_chunk;
if (io_sectors < sectors_per_chunk) {
io_sectors = sectors_per_chunk;
} else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
int64_t target_sector_num;
int target_nb_sectors;
bdrv_round_to_clusters(s->target, sector_num, io_sectors,
&target_sector_num, &target_nb_sectors);
if (target_sector_num == sector_num &&
target_nb_sectors == io_sectors) {
mirror_method = ret & BDRV_BLOCK_ZERO ?
MIRROR_METHOD_ZERO :
MIRROR_METHOD_DISCARD;
}
}
switch (mirror_method) {
case MIRROR_METHOD_COPY:
io_sectors = mirror_do_read(s, sector_num, io_sectors);
break;
case MIRROR_METHOD_ZERO:
mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
break;
case MIRROR_METHOD_DISCARD:
mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
break;
default:
abort();
}
assert(io_sectors);
sector_num += io_sectors;
nb_chunks -= io_sectors / sectors_per_chunk;
delay_ns += ratelimit_calculate_delay(&s->limit, io_sectors);
}
return delay_ns;
}
@@ -344,9 +418,7 @@ static void mirror_free_init(MirrorBlockJob *s)
static void mirror_drain(MirrorBlockJob *s)
{
while (s->in_flight > 0) {
s->waiting_for_io = true;
qemu_coroutine_yield();
s->waiting_for_io = false;
mirror_wait_for_io(s);
}
}
@@ -420,6 +492,7 @@ static void coroutine_fn mirror_run(void *opaque)
checking for a NULL string */
int ret = 0;
int n;
int target_cluster_size = BDRV_SECTOR_SIZE;
if (block_job_is_cancelled(&s->common)) {
goto immediate_exit;
@@ -449,16 +522,16 @@ static void coroutine_fn mirror_run(void *opaque)
*/
bdrv_get_backing_filename(s->target, backing_filename,
sizeof(backing_filename));
if (backing_filename[0] && !s->target->backing) {
ret = bdrv_get_info(s->target, &bdi);
if (ret < 0) {
goto immediate_exit;
}
if (s->granularity < bdi.cluster_size) {
s->buf_size = MAX(s->buf_size, bdi.cluster_size);
s->cow_bitmap = bitmap_new(length);
}
if (!bdrv_get_info(s->target, &bdi) && bdi.cluster_size) {
target_cluster_size = bdi.cluster_size;
}
if (backing_filename[0] && !s->target->backing
&& s->granularity < target_cluster_size) {
s->buf_size = MAX(s->buf_size, target_cluster_size);
s->cow_bitmap = bitmap_new(length);
}
s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
s->max_iov = MIN(s->common.bs->bl.max_iov, s->target->bl.max_iov);
end = s->bdev_length / BDRV_SECTOR_SIZE;
s->buf = qemu_try_blockalign(bs, s->buf_size);
@@ -533,9 +606,7 @@ static void coroutine_fn mirror_run(void *opaque)
if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
s->waiting_for_io = true;
qemu_coroutine_yield();
s->waiting_for_io = false;
mirror_wait_for_io(s);
continue;
} else if (cnt != 0) {
delay_ns = mirror_iteration(s);

View File

@@ -204,18 +204,20 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
saddr = g_new0(SocketAddress, 1);
if (qdict_haskey(options, "path")) {
UnixSocketAddress *q_unix;
saddr->type = SOCKET_ADDRESS_KIND_UNIX;
saddr->u.q_unix = g_new0(UnixSocketAddress, 1);
saddr->u.q_unix->path = g_strdup(qdict_get_str(options, "path"));
q_unix = saddr->u.q_unix = g_new0(UnixSocketAddress, 1);
q_unix->path = g_strdup(qdict_get_str(options, "path"));
qdict_del(options, "path");
} else {
InetSocketAddress *inet;
saddr->type = SOCKET_ADDRESS_KIND_INET;
saddr->u.inet = g_new0(InetSocketAddress, 1);
saddr->u.inet->host = g_strdup(qdict_get_str(options, "host"));
inet = saddr->u.inet = g_new0(InetSocketAddress, 1);
inet->host = g_strdup(qdict_get_str(options, "host"));
if (!qdict_get_try_str(options, "port")) {
saddr->u.inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
} else {
saddr->u.inet->port = g_strdup(qdict_get_str(options, "port"));
inet->port = g_strdup(qdict_get_str(options, "port"));
}
qdict_del(options, "host");
qdict_del(options, "port");

View File

@@ -36,6 +36,7 @@
#include <nfsc/libnfs.h>
#define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
#define QEMU_NFS_MAX_DEBUG_LEVEL 2
typedef struct NFSClient {
struct nfs_context *context;
@@ -333,6 +334,17 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
val = QEMU_NFS_MAX_READAHEAD_SIZE;
}
nfs_set_readahead(client->context, val);
#endif
#ifdef LIBNFS_FEATURE_DEBUG
} else if (!strcmp(qp->p[i].name, "debug")) {
/* limit the maximum debug level to avoid potential flooding
* of our log files. */
if (val > QEMU_NFS_MAX_DEBUG_LEVEL) {
error_report("NFS Warning: Limiting NFS debug level"
" to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
val = QEMU_NFS_MAX_DEBUG_LEVEL;
}
nfs_set_debug(client->context, val);
#endif
} else {
error_setg(errp, "Unknown NFS parameter name: %s",

View File

@@ -30,6 +30,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "qemu/bitmap.h"
#include "qapi/util.h"
@@ -461,7 +462,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size, cl_size;
uint8_t tmp[BDRV_SECTOR_SIZE];
Error *local_err = NULL;
BlockDriverState *file;
BlockBackend *file;
uint32_t bat_entries, bat_sectors;
ParallelsHeader header;
int ret;
@@ -477,14 +478,17 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
return ret;
}
file = NULL;
ret = bdrv_open(&file, filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
if (ret < 0) {
file = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
&local_err);
if (file == NULL) {
error_propagate(errp, local_err);
return ret;
return -EIO;
}
ret = bdrv_truncate(file, 0);
blk_set_allow_write_beyond_eof(file, true);
ret = blk_truncate(file, 0);
if (ret < 0) {
goto exit;
}
@@ -508,18 +512,18 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
memset(tmp, 0, sizeof(tmp));
memcpy(tmp, &header, sizeof(header));
ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
if (ret < 0) {
goto exit;
}
ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0);
ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
if (ret < 0) {
goto exit;
}
ret = 0;
done:
bdrv_unref(file);
blk_unref(file);
return ret;
exit:

View File

@@ -355,100 +355,116 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
qapi_free_BlockInfo(info);
}
static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
bool query_backing)
static BlockStats *bdrv_query_stats(BlockBackend *blk,
const BlockDriverState *bs,
bool query_backing);
static void bdrv_query_blk_stats(BlockStats *s, BlockBackend *blk)
{
BlockStats *s;
BlockAcctStats *stats = blk_get_stats(blk);
BlockAcctTimedStats *ts = NULL;
s = g_malloc0(sizeof(*s));
s->has_device = true;
s->device = g_strdup(blk_name(blk));
if (bdrv_get_device_name(bs)[0]) {
s->has_device = true;
s->device = g_strdup(bdrv_get_device_name(bs));
s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
s->stats->invalid_flush_operations =
stats->invalid_ops[BLOCK_ACCT_FLUSH];
s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
s->stats->has_idle_time_ns = stats->last_access_time_ns > 0;
if (s->stats->has_idle_time_ns) {
s->stats->idle_time_ns = block_acct_idle_time_ns(stats);
}
s->stats->account_invalid = stats->account_invalid;
s->stats->account_failed = stats->account_failed;
while ((ts = block_acct_interval_next(stats, ts))) {
BlockDeviceTimedStatsList *timed_stats =
g_malloc0(sizeof(*timed_stats));
BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
timed_stats->next = s->stats->timed_stats;
timed_stats->value = dev_stats;
s->stats->timed_stats = timed_stats;
TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
dev_stats->interval_length = ts->interval_length;
dev_stats->min_rd_latency_ns = timed_average_min(rd);
dev_stats->max_rd_latency_ns = timed_average_max(rd);
dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
dev_stats->min_wr_latency_ns = timed_average_min(wr);
dev_stats->max_wr_latency_ns = timed_average_max(wr);
dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
dev_stats->min_flush_latency_ns = timed_average_min(fl);
dev_stats->max_flush_latency_ns = timed_average_max(fl);
dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
dev_stats->avg_rd_queue_depth =
block_acct_queue_depth(ts, BLOCK_ACCT_READ);
dev_stats->avg_wr_queue_depth =
block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
}
}
static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
bool query_backing)
{
if (bdrv_get_node_name(bs)[0]) {
s->has_node_name = true;
s->node_name = g_strdup(bdrv_get_node_name(bs));
}
s->stats = g_malloc0(sizeof(*s->stats));
if (bs->blk) {
BlockAcctStats *stats = blk_get_stats(bs->blk);
BlockAcctTimedStats *ts = NULL;
s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
s->stats->invalid_flush_operations =
stats->invalid_ops[BLOCK_ACCT_FLUSH];
s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
s->stats->has_idle_time_ns = stats->last_access_time_ns > 0;
if (s->stats->has_idle_time_ns) {
s->stats->idle_time_ns = block_acct_idle_time_ns(stats);
}
s->stats->account_invalid = stats->account_invalid;
s->stats->account_failed = stats->account_failed;
while ((ts = block_acct_interval_next(stats, ts))) {
BlockDeviceTimedStatsList *timed_stats =
g_malloc0(sizeof(*timed_stats));
BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
timed_stats->next = s->stats->timed_stats;
timed_stats->value = dev_stats;
s->stats->timed_stats = timed_stats;
TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
dev_stats->interval_length = ts->interval_length;
dev_stats->min_rd_latency_ns = timed_average_min(rd);
dev_stats->max_rd_latency_ns = timed_average_max(rd);
dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
dev_stats->min_wr_latency_ns = timed_average_min(wr);
dev_stats->max_wr_latency_ns = timed_average_max(wr);
dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
dev_stats->min_flush_latency_ns = timed_average_min(fl);
dev_stats->max_flush_latency_ns = timed_average_max(fl);
dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
dev_stats->avg_rd_queue_depth =
block_acct_queue_depth(ts, BLOCK_ACCT_READ);
dev_stats->avg_wr_queue_depth =
block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
}
}
s->stats->wr_highest_offset = bs->wr_highest_offset;
if (bs->file) {
s->has_parent = true;
s->parent = bdrv_query_stats(bs->file->bs, query_backing);
s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
}
if (query_backing && bs->backing) {
s->has_backing = true;
s->backing = bdrv_query_stats(bs->backing->bs, query_backing);
s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
}
}
static BlockStats *bdrv_query_stats(BlockBackend *blk,
const BlockDriverState *bs,
bool query_backing)
{
BlockStats *s;
s = g_malloc0(sizeof(*s));
s->stats = g_malloc0(sizeof(*s->stats));
if (blk) {
bdrv_query_blk_stats(s, blk);
}
if (bs) {
bdrv_query_bds_stats(s, bs, query_backing);
}
return s;
@@ -477,22 +493,38 @@ BlockInfoList *qmp_query_block(Error **errp)
return head;
}
static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
bool query_nodes)
{
if (query_nodes) {
*bs = bdrv_next_node(*bs);
return !!*bs;
}
*blk = blk_next(*blk);
*bs = *blk ? blk_bs(*blk) : NULL;
return !!*blk;
}
BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
bool query_nodes,
Error **errp)
{
BlockStatsList *head = NULL, **p_next = &head;
BlockBackend *blk = NULL;
BlockDriverState *bs = NULL;
/* Just to be safe if query_nodes is not always initialized */
query_nodes = has_query_nodes && query_nodes;
while ((bs = query_nodes ? bdrv_next_node(bs) : bdrv_next(bs))) {
while (next_query_bds(&blk, &bs, query_nodes)) {
BlockStatsList *info = g_malloc0(sizeof(*info));
AioContext *ctx = bdrv_get_aio_context(bs);
AioContext *ctx = blk ? blk_get_aio_context(blk)
: bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
info->value = bdrv_query_stats(bs, !query_nodes);
info->value = bdrv_query_stats(blk, bs, !query_nodes);
aio_context_release(ctx);
*p_next = info;

View File

@@ -24,6 +24,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include <zlib.h>
#include "qapi/qmp/qerror.h"
@@ -120,11 +121,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
if (header.version != QCOW_VERSION) {
char version[64];
snprintf(version, sizeof(version), "QCOW version %" PRIu32,
header.version);
error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
bdrv_get_device_or_node_name(bs), "qcow", version);
error_setg(errp, "Unsupported qcow version %" PRIu32, header.version);
ret = -ENOTSUP;
goto fail;
}
@@ -780,7 +777,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
int flags = 0;
Error *local_err = NULL;
int ret;
BlockDriverState *qcow_bs;
BlockBackend *qcow_blk;
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -796,15 +793,18 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
goto cleanup;
}
qcow_bs = NULL;
ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
if (ret < 0) {
qcow_blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
&local_err);
if (qcow_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
goto cleanup;
}
ret = bdrv_truncate(qcow_bs, 0);
blk_set_allow_write_beyond_eof(qcow_blk, true);
ret = blk_truncate(qcow_blk, 0);
if (ret < 0) {
goto exit;
}
@@ -844,13 +844,13 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* write all the data */
ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header));
if (ret != sizeof(header)) {
goto exit;
}
if (backing_file) {
ret = bdrv_pwrite(qcow_bs, sizeof(header),
ret = blk_pwrite(qcow_blk, sizeof(header),
backing_file, backing_filename_len);
if (ret != backing_filename_len) {
goto exit;
@@ -860,7 +860,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
tmp = g_malloc0(BDRV_SECTOR_SIZE);
for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
BDRV_SECTOR_SIZE); i++) {
ret = bdrv_pwrite(qcow_bs, header_size +
ret = blk_pwrite(qcow_blk, header_size +
BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
if (ret != BDRV_SECTOR_SIZE) {
g_free(tmp);
@@ -871,7 +871,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
g_free(tmp);
ret = 0;
exit:
bdrv_unref(qcow_bs);
blk_unref(qcow_blk);
cleanup:
g_free(backing_file);
return ret;

View File

@@ -24,6 +24,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include <zlib.h>
#include "block/qcow2.h"
@@ -197,22 +198,8 @@ static void cleanup_unknown_header_ext(BlockDriverState *bs)
}
}
static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
Error **errp, const char *fmt, ...)
{
char msg[64];
va_list ap;
va_start(ap, fmt);
vsnprintf(msg, sizeof(msg), fmt, ap);
va_end(ap);
error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
bdrv_get_device_or_node_name(bs), "qcow2", msg);
}
static void report_unsupported_feature(BlockDriverState *bs,
Error **errp, Qcow2Feature *table, uint64_t mask)
static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
uint64_t mask)
{
char *features = g_strdup("");
char *old;
@@ -237,7 +224,7 @@ static void report_unsupported_feature(BlockDriverState *bs,
g_free(old);
}
report_unsupported(bs, errp, "%s", features);
error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
g_free(features);
}
@@ -854,7 +841,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
if (header.version < 2 || header.version > 3) {
report_unsupported(bs, errp, "QCOW version %" PRIu32, header.version);
error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
ret = -ENOTSUP;
goto fail;
}
@@ -934,7 +921,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
void *feature_table = NULL;
qcow2_read_extensions(bs, header.header_length, ext_end,
&feature_table, NULL);
report_unsupported_feature(bs, errp, feature_table,
report_unsupported_feature(errp, feature_table,
s->incompatible_features &
~QCOW2_INCOMPAT_MASK);
ret = -ENOTSUP;
@@ -2097,7 +2084,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
* 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
* size for any qcow2 image.
*/
BlockDriverState* bs;
BlockBackend *blk;
QCowHeader *header;
uint64_t* refcount_table;
Error *local_err = NULL;
@@ -2172,14 +2159,16 @@ static int qcow2_create2(const char *filename, int64_t total_size,
return ret;
}
bs = NULL;
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
&local_err);
if (ret < 0) {
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return ret;
return -EIO;
}
blk_set_allow_write_beyond_eof(blk, true);
/* Write the header */
QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
header = g_malloc0(cluster_size);
@@ -2207,7 +2196,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
}
ret = bdrv_pwrite(bs, 0, header, cluster_size);
ret = blk_pwrite(blk, 0, header, cluster_size);
g_free(header);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -2217,7 +2206,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Write a refcount table with one refcount block */
refcount_table = g_malloc0(2 * cluster_size);
refcount_table[0] = cpu_to_be64(2 * cluster_size);
ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size);
g_free(refcount_table);
if (ret < 0) {
@@ -2225,8 +2214,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out;
}
bdrv_unref(bs);
bs = NULL;
blk_unref(blk);
blk = NULL;
/*
* And now open the image and make it consistent first (i.e. increase the
@@ -2235,15 +2224,16 @@ static int qcow2_create2(const char *filename, int64_t total_size,
*/
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow2"));
ret = bdrv_open(&bs, filename, NULL, options,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
&local_err);
if (ret < 0) {
blk = blk_new_open(filename, NULL, options,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
goto out;
}
ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
"header and refcount table");
@@ -2255,14 +2245,14 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
/* Create a full header (including things like feature table) */
ret = qcow2_update_header(bs);
ret = qcow2_update_header(blk_bs(blk));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not update qcow2 header");
goto out;
}
/* Okay, now that we have a valid image, let's give it the right size */
ret = bdrv_truncate(bs, total_size);
ret = blk_truncate(blk, total_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not resize image");
goto out;
@@ -2270,7 +2260,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Want a backing file? There you go.*/
if (backing_file) {
ret = bdrv_change_backing_file(bs, backing_file, backing_format);
ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
"with format '%s'", backing_file, backing_format);
@@ -2280,9 +2270,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* And if we're supposed to preallocate metadata, do that now */
if (prealloc != PREALLOC_MODE_OFF) {
BDRVQcow2State *s = bs->opaque;
BDRVQcow2State *s = blk_bs(blk)->opaque;
qemu_co_mutex_lock(&s->lock);
ret = preallocate(bs);
ret = preallocate(blk_bs(blk));
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not preallocate metadata");
@@ -2290,24 +2280,25 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
}
bdrv_unref(bs);
bs = NULL;
blk_unref(blk);
blk = NULL;
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow2"));
ret = bdrv_open(&bs, filename, NULL, options,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
&local_err);
if (local_err) {
blk = blk_new_open(filename, NULL, options,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
goto out;
}
ret = 0;
out:
if (bs) {
bdrv_unref(bs);
if (blk) {
blk_unref(blk);
}
return ret;
}

View File

@@ -18,6 +18,7 @@
#include "qed.h"
#include "qapi/qmp/qerror.h"
#include "migration/migration.h"
#include "sysemu/block-backend.h"
static const AIOCBInfo qed_aiocb_info = {
.aiocb_size = sizeof(QEDAIOCB),
@@ -376,18 +377,6 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
}
}
static void bdrv_qed_drain(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
/* Cancel timer and start doing I/O that were meant to happen as if it
* fired, that way we get bdrv_drain() taking care of the ongoing requests
* correctly. */
qed_cancel_need_check_timer(s);
qed_plug_allocating_write_reqs(s);
bdrv_aio_flush(s->bs, qed_clear_need_check, s);
}
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -411,11 +400,8 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
}
if (s->header.features & ~QED_FEATURE_MASK) {
/* image uses unsupported feature bits */
char buf[64];
snprintf(buf, sizeof(buf), "%" PRIx64,
s->header.features & ~QED_FEATURE_MASK);
error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
bdrv_get_device_or_node_name(bs), "QED", buf);
error_setg(errp, "Unsupported QED features: %" PRIx64,
s->header.features & ~QED_FEATURE_MASK);
return -ENOTSUP;
}
if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
@@ -580,7 +566,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
size_t l1_size = header.cluster_size * header.table_size;
Error *local_err = NULL;
int ret = 0;
BlockDriverState *bs;
BlockBackend *blk;
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
@@ -588,17 +574,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
return ret;
}
bs = NULL;
ret = bdrv_open(&bs, filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
&local_err);
if (ret < 0) {
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return ret;
return -EIO;
}
blk_set_allow_write_beyond_eof(blk, true);
/* File must start empty and grow, check truncate is supported */
ret = bdrv_truncate(bs, 0);
ret = blk_truncate(blk, 0);
if (ret < 0) {
goto out;
}
@@ -614,18 +601,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
}
qed_header_cpu_to_le(&header, &le_header);
ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header));
if (ret < 0) {
goto out;
}
ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
header.backing_filename_size);
ret = blk_pwrite(blk, sizeof(le_header), backing_file,
header.backing_filename_size);
if (ret < 0) {
goto out;
}
l1_table = g_malloc0(l1_size);
ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size);
if (ret < 0) {
goto out;
}
@@ -633,7 +620,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
ret = 0; /* success */
out:
g_free(l1_table);
bdrv_unref(bs);
blk_unref(blk);
return ret;
}
@@ -1692,7 +1679,6 @@ static BlockDriver bdrv_qed = {
.bdrv_check = bdrv_qed_check,
.bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
.bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
.bdrv_drain = bdrv_qed_drain,
};
static void bdrv_qed_init(void)

View File

@@ -215,14 +215,16 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
return acb;
}
static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
int nb_sectors, char *node_name, int ret)
{
const char *msg = NULL;
if (ret < 0) {
msg = strerror(-ret);
}
qapi_event_send_quorum_report_bad(!!msg, msg, node_name,
acb->sector_num, acb->nb_sectors, &error_abort);
qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
sector_num, nb_sectors, &error_abort);
}
static void quorum_report_failure(QuorumAIOCB *acb)
@@ -284,6 +286,15 @@ static void quorum_aio_cb(void *opaque, int ret)
BDRVQuorumState *s = acb->common.bs->opaque;
bool rewrite = false;
if (ret == 0) {
acb->success_count++;
} else {
QuorumOpType type;
type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
sacb->aiocb->bs->node_name, ret);
}
if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
/* We try to read next child in FIFO order if we fail to read */
if (ret < 0 && (acb->child_iter + 1) < s->num_children) {
@@ -302,11 +313,6 @@ static void quorum_aio_cb(void *opaque, int ret)
sacb->ret = ret;
acb->count++;
if (ret == 0) {
acb->success_count++;
} else {
quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
}
assert(acb->count <= s->num_children);
assert(acb->success_count <= s->num_children);
if (acb->count < s->num_children) {
@@ -338,7 +344,9 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0);
quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
acb->nb_sectors,
s->children[item->index]->bs->node_name, 0);
}
}
}
@@ -648,8 +656,9 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
}
for (i = 0; i < s->num_children; i++) {
bdrv_aio_readv(s->children[i]->bs, acb->sector_num, &acb->qcrs[i].qiov,
acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num,
&acb->qcrs[i].qiov, acb->nb_sectors,
quorum_aio_cb, &acb->qcrs[i]);
}
return &acb->common;
@@ -664,9 +673,10 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
acb->qcrs[acb->child_iter].buf);
bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
&acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
quorum_aio_cb, &acb->qcrs[acb->child_iter]);
acb->qcrs[acb->child_iter].aiocb =
bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
&acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
quorum_aio_cb, &acb->qcrs[acb->child_iter]);
return &acb->common;
}
@@ -760,19 +770,30 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
QuorumVoteValue result_value;
int i;
int result = 0;
int success_count = 0;
QLIST_INIT(&error_votes.vote_list);
error_votes.compare = quorum_64bits_compare;
for (i = 0; i < s->num_children; i++) {
result = bdrv_co_flush(s->children[i]->bs);
result_value.l = result;
quorum_count_vote(&error_votes, &result_value, i);
if (result) {
quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
bdrv_nb_sectors(s->children[i]->bs),
s->children[i]->bs->node_name, result);
result_value.l = result;
quorum_count_vote(&error_votes, &result_value, i);
} else {
success_count++;
}
}
winner = quorum_get_vote_winner(&error_votes);
result = winner->value.l;
if (success_count >= s->threshold) {
result = 0;
} else {
winner = quorum_get_vote_winner(&error_votes);
result = winner->value.l;
}
quorum_free_vote_list(&error_votes);
return result;

View File

@@ -16,6 +16,7 @@
#include "qemu-common.h"
#include "qemu/error-report.h"
#include "block/block_int.h"
#include "crypto/secret.h"
#include <rbd/librbd.h>
@@ -228,6 +229,27 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
return NULL;
}
static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
Error **errp)
{
if (secretid == 0) {
return 0;
}
gchar *secret = qcrypto_secret_lookup_as_base64(secretid,
errp);
if (!secret) {
return -1;
}
rados_conf_set(cluster, "key", secret);
g_free(secret);
return 0;
}
static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
bool only_read_conf_file,
Error **errp)
@@ -299,10 +321,13 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
char conf[RBD_MAX_CONF_SIZE];
char clientname_buf[RBD_MAX_CONF_SIZE];
char *clientname;
const char *secretid;
rados_t cluster;
rados_ioctx_t io_ctx;
int ret;
secretid = qemu_opt_get(opts, "password-secret");
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
name, sizeof(name),
@@ -350,6 +375,11 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
return -EIO;
}
if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
rados_shutdown(cluster);
return -EIO;
}
if (rados_connect(cluster) < 0) {
error_setg(errp, "error connecting");
rados_shutdown(cluster);
@@ -423,6 +453,11 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "Specification of the rbd image",
},
{
.name = "password-secret",
.type = QEMU_OPT_STRING,
.help = "ID of secret providing the password",
},
{ /* end of list */ }
},
};
@@ -436,6 +471,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
char conf[RBD_MAX_CONF_SIZE];
char clientname_buf[RBD_MAX_CONF_SIZE];
char *clientname;
const char *secretid;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
@@ -450,6 +486,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
}
filename = qemu_opt_get(opts, "filename");
secretid = qemu_opt_get(opts, "password-secret");
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
@@ -488,6 +525,11 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
}
}
if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) {
r = -EIO;
goto failed_shutdown;
}
/*
* Fallback to more conservative semantics if setting cache
* options fails. Ignore errors from setting rbd_cache because the
@@ -919,6 +961,11 @@ static QemuOptsList qemu_rbd_create_opts = {
.type = QEMU_OPT_SIZE,
.help = "RBD object size"
},
{
.name = "password-secret",
.type = QEMU_OPT_STRING,
.help = "ID of secret providing the password",
},
{ /* end of list */ }
}
};

View File

@@ -18,6 +18,7 @@
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "block/block_int.h"
#include "sysemu/block-backend.h"
#include "qemu/bitops.h"
#define SD_PROTO_VER 0x01
@@ -284,6 +285,12 @@ static inline bool is_snapshot(struct SheepdogInode *inode)
return !!inode->snap_ctime;
}
static inline size_t count_data_objs(const struct SheepdogInode *inode)
{
return DIV_ROUND_UP(inode->vdi_size,
(1UL << inode->block_size_shift));
}
#undef DPRINTF
#ifdef DEBUG_SDOG
#define DPRINTF(fmt, args...) \
@@ -609,14 +616,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
error_report("failed to send a req, %s", strerror(errno));
ret = -socket_error();
return ret;
return -errno;
}
ret = qemu_co_send(sockfd, data, *wlen);
if (ret != *wlen) {
ret = -socket_error();
error_report("failed to send a req, %s", strerror(errno));
return -errno;
}
return ret;
@@ -1631,7 +1637,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
static int sd_prealloc(const char *filename, Error **errp)
{
BlockDriverState *bs = NULL;
BlockBackend *blk = NULL;
BDRVSheepdogState *base = NULL;
unsigned long buf_size;
uint32_t idx, max_idx;
@@ -1640,19 +1646,23 @@ static int sd_prealloc(const char *filename, Error **errp)
void *buf = NULL;
int ret;
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
errp);
if (ret < 0) {
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
errp);
if (blk == NULL) {
ret = -EIO;
goto out_with_err_set;
}
vdi_size = bdrv_getlength(bs);
blk_set_allow_write_beyond_eof(blk, true);
vdi_size = blk_getlength(blk);
if (vdi_size < 0) {
ret = vdi_size;
goto out;
}
base = bs->opaque;
base = blk_bs(blk)->opaque;
object_size = (UINT32_C(1) << base->inode.block_size_shift);
buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
buf = g_malloc0(buf_size);
@@ -1664,23 +1674,24 @@ static int sd_prealloc(const char *filename, Error **errp)
* The created image can be a cloned image, so we need to read
* a data from the source image.
*/
ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
ret = blk_pread(blk, idx * buf_size, buf, buf_size);
if (ret < 0) {
goto out;
}
ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
ret = blk_pwrite(blk, idx * buf_size, buf, buf_size);
if (ret < 0) {
goto out;
}
}
ret = 0;
out:
if (ret < 0) {
error_setg_errno(errp, -ret, "Can't pre-allocate");
}
out_with_err_set:
if (bs) {
bdrv_unref(bs);
if (blk) {
blk_unref(blk);
}
g_free(buf);
@@ -1820,7 +1831,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
}
if (backing_file) {
BlockDriverState *bs;
BlockBackend *blk;
BDRVSheepdogState *base;
BlockDriver *drv;
@@ -1832,22 +1843,23 @@ static int sd_create(const char *filename, QemuOpts *opts,
goto out;
}
bs = NULL;
ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, errp);
if (ret < 0) {
blk = blk_new_open(backing_file, NULL, NULL,
BDRV_O_PROTOCOL | BDRV_O_CACHE_WB, errp);
if (blk == NULL) {
ret = -EIO;
goto out;
}
base = bs->opaque;
base = blk_bs(blk)->opaque;
if (!is_snapshot(&base->inode)) {
error_setg(errp, "cannot clone from a non snapshot vdi");
bdrv_unref(bs);
blk_unref(blk);
ret = -EINVAL;
goto out;
}
s->inode.vdi_id = base->inode.vdi_id;
bdrv_unref(bs);
blk_unref(blk);
}
s->aio_context = qemu_get_aio_context();
@@ -2478,13 +2490,131 @@ out:
return ret;
}
#define NR_BATCHED_DISCARD 128
static bool remove_objects(BDRVSheepdogState *s)
{
int fd, i = 0, nr_objs = 0;
Error *local_err = NULL;
int ret = 0;
bool result = true;
SheepdogInode *inode = &s->inode;
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
error_report_err(local_err);
return false;
}
nr_objs = count_data_objs(inode);
while (i < nr_objs) {
int start_idx, nr_filled_idx;
while (i < nr_objs && !inode->data_vdi_id[i]) {
i++;
}
start_idx = i;
nr_filled_idx = 0;
while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) {
if (inode->data_vdi_id[i]) {
inode->data_vdi_id[i] = 0;
nr_filled_idx++;
}
i++;
}
ret = write_object(fd, s->aio_context,
(char *)&inode->data_vdi_id[start_idx],
vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
(i - start_idx) * sizeof(uint32_t),
offsetof(struct SheepdogInode,
data_vdi_id[start_idx]),
false, s->cache_flags);
if (ret < 0) {
error_report("failed to discard snapshot inode.");
result = false;
goto out;
}
}
out:
closesocket(fd);
return result;
}
static int sd_snapshot_delete(BlockDriverState *bs,
const char *snapshot_id,
const char *name,
Error **errp)
{
/* FIXME: Delete specified snapshot id. */
return 0;
unsigned long snap_id = 0;
char snap_tag[SD_MAX_VDI_TAG_LEN];
Error *local_err = NULL;
int fd, ret;
char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
BDRVSheepdogState *s = bs->opaque;
unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0;
uint32_t vid;
SheepdogVdiReq hdr = {
.opcode = SD_OP_DEL_VDI,
.data_length = wlen,
.flags = SD_FLAG_CMD_WRITE,
};
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
if (!remove_objects(s)) {
return -1;
}
memset(buf, 0, sizeof(buf));
memset(snap_tag, 0, sizeof(snap_tag));
pstrcpy(buf, SD_MAX_VDI_LEN, s->name);
ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id);
if (ret || snap_id > UINT32_MAX) {
error_setg(errp, "Invalid snapshot ID: %s",
snapshot_id ? snapshot_id : "<null>");
return -EINVAL;
}
if (snap_id) {
hdr.snapid = (uint32_t) snap_id;
} else {
pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id);
pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
}
ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true,
&local_err);
if (ret) {
return ret;
}
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
error_report_err(local_err);
return -1;
}
ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
buf, &wlen, &rlen);
closesocket(fd);
if (ret) {
return ret;
}
switch (rsp->result) {
case SD_RES_NO_VDI:
error_report("%s was already deleted", s->name);
case SD_RES_SUCCESS:
break;
default:
error_report("%s, %s", sd_strerror(rsp->result), s->name);
return -1;
}
return ret;
}
static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)

View File

@@ -52,6 +52,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "migration/migration.h"
#include "qemu/coroutine.h"
@@ -733,7 +734,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
size_t bmap_size;
int64_t offset = 0;
Error *local_err = NULL;
BlockDriverState *bs = NULL;
BlockBackend *blk = NULL;
uint32_t *bmap = NULL;
logout("\n");
@@ -766,13 +767,18 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
error_propagate(errp, local_err);
goto exit;
}
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
&local_err);
if (ret < 0) {
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
goto exit;
}
blk_set_allow_write_beyond_eof(blk, true);
/* We need enough blocks to store the given disk size,
so always round up. */
blocks = DIV_ROUND_UP(bytes, block_size);
@@ -802,7 +808,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
vdi_header_print(&header);
#endif
vdi_header_to_le(&header);
ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
ret = blk_pwrite(blk, offset, &header, sizeof(header));
if (ret < 0) {
error_setg(errp, "Error writing header to %s", filename);
goto exit;
@@ -823,7 +829,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
bmap[i] = VDI_UNALLOCATED;
}
}
ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
ret = blk_pwrite(blk, offset, bmap, bmap_size);
if (ret < 0) {
error_setg(errp, "Error writing bmap to %s", filename);
goto exit;
@@ -832,7 +838,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
if (image_type == VDI_TYPE_STATIC) {
ret = bdrv_truncate(bs, offset + blocks * block_size);
ret = blk_truncate(blk, offset + blocks * block_size);
if (ret < 0) {
error_setg(errp, "Failed to statically allocate %s", filename);
goto exit;
@@ -840,7 +846,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
exit:
bdrv_unref(bs);
blk_unref(blk);
g_free(bmap);
return ret;
}

View File

@@ -18,6 +18,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "qemu/crc32c.h"
#include "block/vhdx.h"
@@ -264,10 +265,10 @@ static void vhdx_region_unregister_all(BDRVVHDXState *s)
static void vhdx_set_shift_bits(BDRVVHDXState *s)
{
s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block);
s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
s->block_size_bits = 31 - clz32(s->block_size);
s->logical_sector_size_bits = ctz32(s->logical_sector_size);
s->sectors_per_block_bits = ctz32(s->sectors_per_block);
s->chunk_ratio_bits = ctz64(s->chunk_ratio);
s->block_size_bits = ctz32(s->block_size);
}
/*
@@ -857,14 +858,8 @@ static void vhdx_calc_bat_entries(BDRVVHDXState *s)
{
uint32_t data_blocks_cnt, bitmap_blocks_cnt;
data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
data_blocks_cnt++;
}
bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
bitmap_blocks_cnt++;
}
data_blocks_cnt = DIV_ROUND_UP(s->virtual_disk_size, s->block_size);
bitmap_blocks_cnt = DIV_ROUND_UP(data_blocks_cnt, s->chunk_ratio);
if (s->parent_entries) {
s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
@@ -1778,7 +1773,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
gunichar2 *creator = NULL;
glong creator_items;
BlockDriverState *bs;
BlockBackend *blk;
char *type = NULL;
VHDXImageType image_type;
Error *local_err = NULL;
@@ -1843,14 +1838,17 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
bs = NULL;
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
&local_err);
if (ret < 0) {
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
goto exit;
}
blk_set_allow_write_beyond_eof(blk, true);
/* Create (A) */
/* The creator field is optional, but may be useful for
@@ -1858,13 +1856,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
&creator_items, NULL);
signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
if (ret < 0) {
goto delete_and_exit;
}
if (creator) {
ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature),
creator, creator_items * sizeof(gunichar2));
ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
creator, creator_items * sizeof(gunichar2));
if (ret < 0) {
goto delete_and_exit;
}
@@ -1872,13 +1870,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
/* Creates (B),(C) */
ret = vhdx_create_new_headers(bs, image_size, log_size);
ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size);
if (ret < 0) {
goto delete_and_exit;
}
/* Creates (D),(E),(G) explicitly. (F) created as by-product */
ret = vhdx_create_new_region_table(bs, image_size, block_size, 512,
ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512,
log_size, use_zero_blocks, image_type,
&metadata_offset);
if (ret < 0) {
@@ -1886,7 +1884,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* Creates (H) */
ret = vhdx_create_new_metadata(bs, image_size, block_size, 512,
ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512,
metadata_offset, image_type);
if (ret < 0) {
goto delete_and_exit;
@@ -1894,7 +1892,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
delete_and_exit:
bdrv_unref(bs);
blk_unref(blk);
exit:
g_free(type);
g_free(creator);

View File

@@ -26,6 +26,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "sysemu/block-backend.h"
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
@@ -242,15 +243,17 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
{
char desc[DESC_SIZE];
char *desc;
uint32_t cid = 0xffffffff;
const char *p_name, *cid_str;
size_t cid_str_size;
BDRVVmdkState *s = bs->opaque;
int ret;
desc = g_malloc0(DESC_SIZE);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
g_free(desc);
return 0;
}
@@ -269,41 +272,45 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
sscanf(p_name, "%" SCNx32, &cid);
}
g_free(desc);
return cid;
}
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
{
char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
char *desc, *tmp_desc;
char *p_name, *tmp_str;
BDRVVmdkState *s = bs->opaque;
int ret;
int ret = 0;
desc = g_malloc0(DESC_SIZE);
tmp_desc = g_malloc0(DESC_SIZE);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
return ret;
goto out;
}
desc[DESC_SIZE - 1] = '\0';
tmp_str = strstr(desc, "parentCID");
if (tmp_str == NULL) {
return -EINVAL;
ret = -EINVAL;
goto out;
}
pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
p_name = strstr(desc, "CID");
if (p_name != NULL) {
p_name += sizeof("CID");
snprintf(p_name, sizeof(desc) - (p_name - desc), "%" PRIx32 "\n", cid);
pstrcat(desc, sizeof(desc), tmp_desc);
snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
pstrcat(desc, DESC_SIZE, tmp_desc);
}
ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
return ret;
}
return 0;
out:
g_free(desc);
g_free(tmp_desc);
return ret;
}
static int vmdk_is_cid_valid(BlockDriverState *bs)
@@ -337,15 +344,16 @@ static int vmdk_reopen_prepare(BDRVReopenState *state,
static int vmdk_parent_open(BlockDriverState *bs)
{
char *p_name;
char desc[DESC_SIZE + 1];
char *desc;
BDRVVmdkState *s = bs->opaque;
int ret;
desc[DESC_SIZE] = '\0';
desc = g_malloc0(DESC_SIZE + 1);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
return ret;
goto out;
}
ret = 0;
p_name = strstr(desc, "parentFileNameHint");
if (p_name != NULL) {
@@ -354,16 +362,20 @@ static int vmdk_parent_open(BlockDriverState *bs)
p_name += sizeof("parentFileNameHint") + 1;
end_name = strchr(p_name, '\"');
if (end_name == NULL) {
return -EINVAL;
ret = -EINVAL;
goto out;
}
if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
return -EINVAL;
ret = -EINVAL;
goto out;
}
pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
}
return 0;
out:
g_free(desc);
return ret;
}
/* Create and append extent to the extent array. Return the added VmdkExtent
@@ -649,11 +661,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
compressed =
le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
if (le32_to_cpu(header.version) > 3) {
char buf[64];
snprintf(buf, sizeof(buf), "VMDK version %" PRId32,
le32_to_cpu(header.version));
error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
bdrv_get_device_or_node_name(bs), "vmdk", buf);
error_setg(errp, "Unsupported VMDK version %" PRIu32,
le32_to_cpu(header.version));
return -ENOTSUP;
} else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) &&
!compressed) {
@@ -1639,7 +1648,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
QemuOpts *opts, Error **errp)
{
int ret, i;
BlockDriverState *bs = NULL;
BlockBackend *blk = NULL;
VMDK4Header header;
Error *local_err = NULL;
uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
@@ -1652,16 +1661,19 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
goto exit;
}
assert(bs == NULL);
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
&local_err);
if (ret < 0) {
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
goto exit;
}
blk_set_allow_write_beyond_eof(blk, true);
if (flat) {
ret = bdrv_truncate(bs, filesize);
ret = blk_truncate(blk, filesize);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
}
@@ -1716,18 +1728,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
header.check_bytes[3] = 0xa;
/* write all the data */
ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
ret = blk_pwrite(blk, 0, &magic, sizeof(magic));
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9);
ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
goto exit;
@@ -1740,8 +1752,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
i < gt_count; i++, tmp += gt_size) {
gd_buf[i] = cpu_to_le32(tmp);
}
ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
gd_buf, gd_buf_size);
ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
gd_buf, gd_buf_size);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1752,8 +1764,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
i < gt_count; i++, tmp += gt_size) {
gd_buf[i] = cpu_to_le32(tmp);
}
ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
gd_buf, gd_buf_size);
ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
gd_buf, gd_buf_size);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1761,8 +1773,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
ret = 0;
exit:
if (bs) {
bdrv_unref(bs);
if (blk) {
blk_unref(blk);
}
g_free(gd_buf);
return ret;
@@ -1811,7 +1823,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
{
int idx = 0;
BlockDriverState *new_bs = NULL;
BlockBackend *new_blk = NULL;
Error *local_err = NULL;
char *desc = NULL;
int64_t total_size = 0, filesize;
@@ -1922,7 +1934,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
if (backing_file) {
BlockDriverState *bs = NULL;
BlockBackend *blk;
char *full_backing = g_new0(char, PATH_MAX);
bdrv_get_full_backing_filename_from_filename(filename, backing_file,
full_backing, PATH_MAX,
@@ -1933,18 +1945,21 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
ret = -ENOENT;
goto exit;
}
ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, errp);
blk = blk_new_open(full_backing, NULL, NULL,
BDRV_O_NO_BACKING | BDRV_O_CACHE_WB, errp);
g_free(full_backing);
if (ret != 0) {
if (blk == NULL) {
ret = -EIO;
goto exit;
}
if (strcmp(bs->drv->format_name, "vmdk")) {
bdrv_unref(bs);
if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) {
blk_unref(blk);
ret = -EINVAL;
goto exit;
}
parent_cid = vmdk_read_cid(bs, 0);
bdrv_unref(bs);
parent_cid = vmdk_read_cid(blk_bs(blk), 0);
blk_unref(blk);
snprintf(parent_desc_line, BUF_SIZE,
"parentFileNameHint=\"%s\"", backing_file);
}
@@ -2002,14 +2017,19 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
}
assert(new_bs == NULL);
ret = bdrv_open(&new_bs, filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
if (ret < 0) {
new_blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
&local_err);
if (new_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
goto exit;
}
ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
blk_set_allow_write_beyond_eof(new_blk, true);
ret = blk_pwrite(new_blk, desc_offset, desc, desc_len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write description");
goto exit;
@@ -2017,14 +2037,14 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
/* bdrv_pwrite write padding zeros to align to sector, we don't need that
* for description file */
if (desc_offset == 0) {
ret = bdrv_truncate(new_bs, desc_len);
ret = blk_truncate(new_blk, desc_len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
}
}
exit:
if (new_bs) {
bdrv_unref(new_bs);
if (new_blk) {
blk_unref(new_blk);
}
g_free(adapter_type);
g_free(backing_file);

View File

@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "migration/migration.h"
#if defined(CONFIG_UUID)
@@ -46,8 +47,14 @@ enum vhd_type {
// Seconds since Jan 1, 2000 0:00:00 (UTC)
#define VHD_TIMESTAMP_BASE 946684800
#define VHD_CHS_MAX_C 65535LL
#define VHD_CHS_MAX_H 16
#define VHD_CHS_MAX_S 255
#define VHD_MAX_SECTORS (65535LL * 255 * 255)
#define VHD_MAX_GEOMETRY (65535LL * 16 * 255)
#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
#define VPC_OPT_FORCE_SIZE "force_size"
// always big-endian
typedef struct vhd_footer {
@@ -128,6 +135,8 @@ typedef struct BDRVVPCState {
uint32_t block_size;
uint32_t bitmap_size;
bool force_use_chs;
bool force_use_sz;
#ifdef CACHE
uint8_t *pageentry_u8;
@@ -140,6 +149,22 @@ typedef struct BDRVVPCState {
Error *migration_blocker;
} BDRVVPCState;
#define VPC_OPT_SIZE_CALC "force_size_calc"
static QemuOptsList vpc_runtime_opts = {
.name = "vpc-runtime-opts",
.head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
.desc = {
{
.name = VPC_OPT_SIZE_CALC,
.type = QEMU_OPT_STRING,
.help = "Force disk size calculation to use either CHS geometry, "
"or use the disk current_size specified in the VHD footer. "
"{chs, current_size}"
},
{ /* end of list */ }
}
};
static uint32_t vpc_checksum(uint8_t* buf, size_t size)
{
uint32_t res = 0;
@@ -159,6 +184,25 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
Error **errp)
{
BDRVVPCState *s = bs->opaque;
const char *size_calc;
size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
if (!size_calc) {
/* no override, use autodetect only */
} else if (!strcmp(size_calc, "current_size")) {
s->force_use_sz = true;
} else if (!strcmp(size_calc, "chs")) {
s->force_use_chs = true;
} else {
error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
}
}
static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -166,6 +210,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int i;
VHDFooter *footer;
VHDDynDiskHeader *dyndisk_header;
QemuOpts *opts = NULL;
Error *local_err = NULL;
bool use_chs;
uint8_t buf[HEADER_SIZE];
uint32_t checksum;
uint64_t computed_size;
@@ -173,6 +220,21 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int disk_type = VHD_DYNAMIC;
int ret;
opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto fail;
}
vpc_parse_options(bs, opts, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto fail;
}
ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
if (ret < 0) {
goto fail;
@@ -218,12 +280,36 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
bs->total_sectors = (int64_t)
be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
/* Images that have exactly the maximum geometry are probably bigger and
* would be truncated if we adhered to the geometry for them. Rely on
* footer->current_size for them. */
if (bs->total_sectors == VHD_MAX_GEOMETRY) {
/* Microsoft Virtual PC and Microsoft Hyper-V produce and read
* VHD image sizes differently. VPC will rely on CHS geometry,
* while Hyper-V and disk2vhd use the size specified in the footer.
*
* We use a couple of approaches to try and determine the correct method:
* look at the Creator App field, and look for images that have CHS
* geometry that is the maximum value.
*
* If the CHS geometry is the maximum CHS geometry, then we assume that
* the size is the footer->current_size to avoid truncation. Otherwise,
* we follow the table based on footer->creator_app:
*
* Known creator apps:
* 'vpc ' : CHS Virtual PC (uses disk geometry)
* 'qemu' : CHS QEMU (uses disk geometry)
* 'qem2' : current_size QEMU (uses current_size)
* 'win ' : current_size Hyper-V
* 'd2v ' : current_size Disk2vhd
*
* The user can override the table values via drive options, however
* even with an override we will still use current_size for images
* that have CHS geometry of the maximum size.
*/
use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
!!strncmp(footer->creator_app, "qem2", 4) &&
!!strncmp(footer->creator_app, "d2v ", 4)) || s->force_use_chs;
if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
bs->total_sectors = be64_to_cpu(footer->current_size) /
BDRV_SECTOR_SIZE;
BDRV_SECTOR_SIZE;
}
/* Allow a maximum disk size of approximately 2 TB */
@@ -673,7 +759,7 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
return 0;
}
static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
int64_t total_sectors)
{
VHDDynDiskHeader *dyndisk_header =
@@ -687,13 +773,13 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
if (ret) {
goto fail;
}
offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
if (ret < 0) {
goto fail;
}
@@ -703,7 +789,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
memset(buf, 0xFF, 512);
for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
ret = bdrv_pwrite_sync(bs, offset, buf, 512);
ret = blk_pwrite(blk, offset, buf, 512);
if (ret < 0) {
goto fail;
}
@@ -730,7 +816,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
// Write the header
offset = 512;
ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
ret = blk_pwrite(blk, offset, buf, 1024);
if (ret < 0) {
goto fail;
}
@@ -739,7 +825,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
return ret;
}
static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
int64_t total_size)
{
int ret;
@@ -747,12 +833,12 @@ static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
/* Add footer to total size */
total_size += HEADER_SIZE;
ret = bdrv_truncate(bs, total_size);
ret = blk_truncate(blk, total_size);
if (ret < 0) {
return ret;
}
ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
if (ret < 0) {
return ret;
}
@@ -773,8 +859,9 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size;
int disk_type;
int ret = -EIO;
bool force_size;
Error *local_err = NULL;
BlockDriverState *bs = NULL;
BlockBackend *blk = NULL;
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -793,30 +880,44 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
disk_type = VHD_DYNAMIC;
}
force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto out;
}
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
&local_err);
if (ret < 0) {
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
goto out;
}
blk_set_allow_write_beyond_eof(blk, true);
/*
* Calculate matching total_size and geometry. Increase the number of
* sectors requested until we get enough (or fail). This ensures that
* qemu-img convert doesn't truncate images, but rather rounds up.
*
* If the image size can't be represented by a spec conform CHS geometry,
* If the image size can't be represented by a spec conformant CHS geometry,
* we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
* the image size from the VHD footer to calculate total_sectors.
*/
total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
if (force_size) {
/* This will force the use of total_size for sector count, below */
cyls = VHD_CHS_MAX_C;
heads = VHD_CHS_MAX_H;
secs_per_cyl = VHD_CHS_MAX_S;
} else {
total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
}
}
if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
@@ -835,8 +936,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
memset(buf, 0, 1024);
memcpy(footer->creator, "conectix", 8);
/* TODO Check if "qemu" creator_app is ok for VPC */
memcpy(footer->creator_app, "qemu", 4);
if (force_size) {
memcpy(footer->creator_app, "qem2", 4);
} else {
memcpy(footer->creator_app, "qemu", 4);
}
memcpy(footer->creator_os, "Wi2k", 4);
footer->features = cpu_to_be32(0x02);
@@ -866,13 +970,13 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
if (disk_type == VHD_DYNAMIC) {
ret = create_dynamic_disk(bs, buf, total_sectors);
ret = create_dynamic_disk(blk, buf, total_sectors);
} else {
ret = create_fixed_disk(bs, buf, total_size);
ret = create_fixed_disk(blk, buf, total_size);
}
out:
bdrv_unref(bs);
blk_unref(blk);
g_free(disk_type_param);
return ret;
}
@@ -917,6 +1021,13 @@ static QemuOptsList vpc_create_opts = {
"Type of virtual hard disk format. Supported formats are "
"{dynamic (default) | fixed} "
},
{
.name = VPC_OPT_FORCE_SIZE,
.type = QEMU_OPT_BOOL,
.help = "Force disk size calculation to use the actual size "
"specified, rather than using the nearest CHS-based "
"calculation"
},
{ /* end of list */ }
}
};

View File

@@ -147,6 +147,7 @@ void blockdev_auto_del(BlockBackend *blk)
DriveInfo *dinfo = blk_legacy_dinfo(blk);
if (dinfo && dinfo->auto_del) {
monitor_remove_blk(blk);
blk_unref(blk);
}
}
@@ -561,7 +562,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
if ((!file || !*file) && !qdict_size(bs_opts)) {
BlockBackendRootState *blk_rs;
blk = blk_new(qemu_opts_id(opts), errp);
blk = blk_new(errp);
if (!blk) {
goto early_err;
}
@@ -593,19 +594,11 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
if (snapshot) {
/* always use cache=unsafe with snapshot */
qdict_put(bs_opts, BDRV_OPT_CACHE_WB, qstring_from_str("on"));
qdict_put(bs_opts, BDRV_OPT_CACHE_DIRECT, qstring_from_str("off"));
qdict_put(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, qstring_from_str("on"));
}
if (runstate_check(RUN_STATE_INMIGRATE)) {
bdrv_flags |= BDRV_O_INACTIVE;
}
blk = blk_new_open(qemu_opts_id(opts), file, NULL, bs_opts, bdrv_flags,
errp);
blk = blk_new_open(file, NULL, bs_opts, bdrv_flags, errp);
if (!blk) {
goto err_no_bs_opts;
}
@@ -637,6 +630,12 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
blk_set_on_error(blk, on_read_error, on_write_error);
if (!monitor_add_blk(blk, qemu_opts_id(opts), errp)) {
blk_unref(blk);
blk = NULL;
goto err_no_bs_opts;
}
err_no_bs_opts:
qemu_opts_del(opts);
QDECREF(interval_dict);
@@ -682,6 +681,13 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
goto fail;
}
/* bdrv_open() defaults to the values in bdrv_flags (for compatibility
* with other callers) rather than what we want as the real defaults.
* Apply the defaults here instead. */
qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_WB, "on");
qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
if (runstate_check(RUN_STATE_INMIGRATE)) {
bdrv_flags |= BDRV_O_INACTIVE;
}
@@ -717,6 +723,13 @@ void blockdev_close_all_bdrv_states(void)
}
}
/* Iterates over the list of monitor-owned BlockDriverStates */
BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs)
{
return bs ? QTAILQ_NEXT(bs, monitor_list)
: QTAILQ_FIRST(&monitor_bdrv_states);
}
static void qemu_opt_rename(QemuOpts *opts, const char *from, const char *to,
Error **errp)
{
@@ -1173,7 +1186,7 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
int ret;
if (!strcmp(device, "all")) {
ret = bdrv_commit_all();
ret = blk_commit_all();
} else {
BlockDriverState *bs;
AioContext *aio_context;
@@ -1202,15 +1215,11 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
}
}
static void blockdev_do_action(TransactionActionKind type, void *data,
Error **errp)
static void blockdev_do_action(TransactionAction *action, Error **errp)
{
TransactionAction action;
TransactionActionList list;
action.type = type;
action.u.data = data;
list.value = &action;
list.value = action;
list.next = NULL;
qmp_transaction(&list, false, NULL, errp);
}
@@ -1236,8 +1245,11 @@ void qmp_blockdev_snapshot_sync(bool has_device, const char *device,
.has_mode = has_mode,
.mode = mode,
};
blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC,
&snapshot, errp);
TransactionAction action = {
.type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC,
.u.blockdev_snapshot_sync = &snapshot,
};
blockdev_do_action(&action, errp);
}
void qmp_blockdev_snapshot(const char *node, const char *overlay,
@@ -1247,9 +1259,11 @@ void qmp_blockdev_snapshot(const char *node, const char *overlay,
.node = (char *) node,
.overlay = (char *) overlay
};
blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT,
&snapshot_data, errp);
TransactionAction action = {
.type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT,
.u.blockdev_snapshot = &snapshot_data,
};
blockdev_do_action(&action, errp);
}
void qmp_blockdev_snapshot_internal_sync(const char *device,
@@ -1260,9 +1274,11 @@ void qmp_blockdev_snapshot_internal_sync(const char *device,
.device = (char *) device,
.name = (char *) name
};
blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC,
&snapshot, errp);
TransactionAction action = {
.type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC,
.u.blockdev_snapshot_internal_sync = &snapshot,
};
blockdev_do_action(&action, errp);
}
SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
@@ -1729,10 +1745,15 @@ static void external_snapshot_prepare(BlkActionState *common,
/* create new image w/backing file */
mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
if (mode != NEW_IMAGE_MODE_EXISTING) {
int64_t size = bdrv_getlength(state->old_bs);
if (size < 0) {
error_setg_errno(errp, -size, "bdrv_getlength failed");
return;
}
bdrv_img_create(new_image_file, format,
state->old_bs->filename,
state->old_bs->drv->format_name,
NULL, -1, flags, &local_err, false);
NULL, size, flags, &local_err, false);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -2405,11 +2426,6 @@ void qmp_x_blockdev_remove_medium(const char *device, Error **errp)
goto out;
}
/* This follows the convention established by bdrv_make_anon() */
if (bs->device_list.tqe_prev) {
bdrv_device_remove(bs);
}
blk_remove_bs(blk);
if (!blk_dev_has_tray(blk)) {
@@ -2457,8 +2473,6 @@ static void qmp_blockdev_insert_anon_medium(const char *device,
blk_insert_bs(blk, bs);
QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
if (!blk_dev_has_tray(blk)) {
/* For tray-less devices, blockdev-close-tray is a no-op (or may not be
* called at all); therefore, the medium needs to be pushed into the
@@ -2816,6 +2830,15 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
AioContext *aio_context;
Error *local_err = NULL;
bs = bdrv_find_node(id);
if (bs) {
qmp_x_blockdev_del(false, NULL, true, id, &local_err);
if (local_err) {
error_report_err(local_err);
}
return;
}
blk = blk_by_name(id);
if (!blk) {
error_report("Device '%s' not found", id);
@@ -2842,13 +2865,16 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
blk_remove_bs(blk);
}
/* if we have a device attached to this BlockDriverState
* then we need to make the drive anonymous until the device
* can be removed. If this is a drive with no device backing
* then we can just get rid of the block driver state right here.
/* Make the BlockBackend and the attached BlockDriverState anonymous */
monitor_remove_blk(blk);
if (blk_bs(blk)) {
bdrv_make_anon(blk_bs(blk));
}
/* If this BlockBackend has a device attached to it, its refcount will be
* decremented when the device is removed; otherwise we have to do so here.
*/
if (blk_get_attached_dev(blk)) {
blk_hide_on_behalf_of_hmp_drive_del(blk);
/* Further I/O must not pause the guest */
blk_set_on_error(blk, BLOCKDEV_ON_ERROR_REPORT,
BLOCKDEV_ON_ERROR_REPORT);
@@ -3867,6 +3893,37 @@ out:
aio_context_release(aio_context);
}
void hmp_drive_add_node(Monitor *mon, const char *optstr)
{
QemuOpts *opts;
QDict *qdict;
Error *local_err = NULL;
opts = qemu_opts_parse_noisily(&qemu_drive_opts, optstr, false);
if (!opts) {
return;
}
qdict = qemu_opts_to_qdict(opts, NULL);
if (!qdict_get_try_str(qdict, "node-name")) {
QDECREF(qdict);
error_report("'node-name' needs to be specified");
goto out;
}
BlockDriverState *bs = bds_tree_init(qdict, &local_err);
if (!bs) {
error_report_err(local_err);
goto out;
}
QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
out:
qemu_opts_del(opts);
}
void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
{
QmpOutputVisitor *ov = qmp_output_visitor_new();
@@ -3928,6 +3985,7 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
if (bs && bdrv_key_required(bs)) {
if (blk) {
monitor_remove_blk(blk);
blk_unref(blk);
} else {
QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
@@ -3957,6 +4015,7 @@ void qmp_x_blockdev_del(bool has_id, const char *id,
}
if (has_id) {
/* blk_by_name() never returns a BB that is not owned by the monitor */
blk = blk_by_name(id);
if (!blk) {
error_setg(errp, "Cannot find block backend %s", id);
@@ -4004,6 +4063,7 @@ void qmp_x_blockdev_del(bool has_id, const char *id,
}
if (blk) {
monitor_remove_blk(blk);
blk_unref(blk);
} else {
QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
@@ -4174,7 +4234,7 @@ QemuOptsList qemu_common_drive_opts = {
static QemuOptsList qemu_root_bds_opts = {
.name = "root-bds",
.head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
.head = QTAILQ_HEAD_INITIALIZER(qemu_root_bds_opts.head),
.desc = {
{
.name = "discard",

21
configure vendored
View File

@@ -280,6 +280,7 @@ libusb=""
usb_redir=""
opengl=""
opengl_dmabuf="no"
avx2_opt="no"
zlib="yes"
lzo=""
snappy=""
@@ -1773,6 +1774,21 @@ EOF
fi
##########################################
# avx2 optimization requirement check
cat > $TMPC << EOF
static void bar(void) {}
static void *bar_ifunc(void) {return (void*) bar;}
static void foo(void) __attribute__((ifunc("bar_ifunc")));
int main(void) { foo(); return 0; }
EOF
if compile_prog "-mavx2" "" ; then
if readelf --syms $TMPE |grep "IFUNC.*foo" >/dev/null 2>&1; then
avx2_opt="yes"
fi
fi
#########################################
# zlib check
if test "$zlib" != "no" ; then
@@ -4790,6 +4806,7 @@ echo "bzip2 support $bzip2"
echo "NUMA host support $numa"
echo "tcmalloc support $tcmalloc"
echo "jemalloc support $jemalloc"
echo "avx2 optimization $avx2_opt"
if test "$sdl_too_old" = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5178,6 +5195,10 @@ if test "$opengl" = "yes" ; then
fi
fi
if test "$avx2_opt" = "yes" ; then
echo "CONFIG_AVX2_OPT=y" >> $config_host_mak
fi
if test "$lzo" = "yes" ; then
echo "CONFIG_LZO=y" >> $config_host_mak
fi

70
cpus.c
View File

@@ -29,6 +29,7 @@
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
#include "sysemu/sysemu.h"
#include "sysemu/block-backend.h"
#include "exec/gdbstub.h"
#include "sysemu/dma.h"
#include "sysemu/kvm.h"
@@ -370,9 +371,12 @@ static void icount_warp_rt(void)
}
}
static void icount_dummy_timer(void *opaque)
static void icount_timer_cb(void *opaque)
{
(void)opaque;
/* No need for a checkpoint because the timer already synchronizes
* with CHECKPOINT_CLOCK_VIRTUAL_RT.
*/
icount_warp_rt();
}
void qtest_clock_warp(int64_t dest)
@@ -396,17 +400,12 @@ void qtest_clock_warp(int64_t dest)
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
}
void qemu_clock_warp(QEMUClockType type)
void qemu_start_warp_timer(void)
{
int64_t clock;
int64_t deadline;
/*
* There are too many global variables to make the "warp" behavior
* applicable to other clocks. But a clock argument removes the
* need for if statements all over the place.
*/
if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
if (!use_icount) {
return;
}
@@ -418,29 +417,17 @@ void qemu_clock_warp(QEMUClockType type)
}
/* warp clock deterministically in record/replay mode */
if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP)) {
if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
return;
}
if (icount_sleep) {
/*
* If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
* This ensures that the deadline for the timer is computed correctly
* below.
* This also makes sure that the insn counter is synchronized before
* the CPU starts running, in case the CPU is woken by an event other
* than the earliest QEMU_CLOCK_VIRTUAL timer.
*/
icount_warp_rt();
timer_del(icount_warp_timer);
}
if (!all_cpu_threads_idle()) {
return;
}
if (qtest_enabled()) {
/* When testing, qtest commands advance icount. */
return;
return;
}
/* We want to use the earliest deadline from ALL vm_clocks */
@@ -496,6 +483,28 @@ void qemu_clock_warp(QEMUClockType type)
}
}
static void qemu_account_warp_timer(void)
{
if (!use_icount || !icount_sleep) {
return;
}
/* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
* do not fire, so computing the deadline does not make sense.
*/
if (!runstate_is_running()) {
return;
}
/* warp clock deterministically in record/replay mode */
if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
return;
}
timer_del(icount_warp_timer);
icount_warp_rt();
}
static bool icount_state_needed(void *opaque)
{
return use_icount;
@@ -624,13 +633,13 @@ void configure_icount(QemuOpts *opts, Error **errp)
icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
if (icount_sleep) {
icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
icount_dummy_timer, NULL);
icount_timer_cb, NULL);
}
icount_align_option = qemu_opt_get_bool(opts, "align", false);
if (icount_align_option && !icount_sleep) {
error_setg(errp, "align=on and sleep=no are incompatible");
error_setg(errp, "align=on and sleep=off are incompatible");
}
if (strcmp(option, "auto") != 0) {
errno = 0;
@@ -643,7 +652,7 @@ void configure_icount(QemuOpts *opts, Error **errp)
} else if (icount_align_option) {
error_setg(errp, "shift=auto and align=on are incompatible");
} else if (!icount_sleep) {
error_setg(errp, "shift=auto and sleep=no are incompatible");
error_setg(errp, "shift=auto and sleep=off are incompatible");
}
use_icount = 2;
@@ -726,7 +735,7 @@ static int do_vm_stop(RunState state)
}
bdrv_drain_all();
ret = bdrv_flush_all();
ret = blk_flush_all();
return ret;
}
@@ -995,9 +1004,6 @@ static void qemu_wait_io_event_common(CPUState *cpu)
static void qemu_tcg_wait_io_event(CPUState *cpu)
{
while (all_cpu_threads_idle()) {
/* Start accounting real time to the virtual clock if the CPUs
are idle. */
qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
@@ -1428,7 +1434,7 @@ int vm_stop_force_state(RunState state)
bdrv_drain_all();
/* Make sure to return an error if the flush in a previous vm_stop()
* failed. */
return bdrv_flush_all();
return blk_flush_all();
}
}
@@ -1499,7 +1505,7 @@ static void tcg_exec_all(void)
int r;
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
qemu_account_warp_timer();
if (next_cpu == NULL) {
next_cpu = first_cpu;

View File

@@ -416,8 +416,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
/* Write access calls the I/O callback. */
te->addr_write = address | TLB_MMIO;
} else if (memory_region_is_ram(section->mr)
&& cpu_physical_memory_is_clean(section->mr->ram_addr
+ xlat)) {
&& cpu_physical_memory_is_clean(
memory_region_get_ram_addr(section->mr) + xlat)) {
te->addr_write = address | TLB_NOTDIRTY;
} else {
te->addr_write = address;

View File

@@ -110,3 +110,4 @@ CONFIG_IOH3420=y
CONFIG_I82801B11=y
CONFIG_ACPI=y
CONFIG_SMBIOS=y
CONFIG_ASPEED_SOC=y

View File

@@ -30,6 +30,7 @@
#include "qemu/config-file.h"
#include "sysemu/sysemu.h"
#include "monitor/monitor.h"
#include "block/block_int.h"
static DriveInfo *add_init_drive(const char *optstr)
{
@@ -55,6 +56,12 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict)
{
DriveInfo *dinfo = NULL;
const char *opts = qdict_get_str(qdict, "opts");
bool node = qdict_get_try_bool(qdict, "node", false);
if (node) {
hmp_drive_add_node(mon, opts);
return;
}
dinfo = add_init_drive(opts);
if (!dinfo) {
@@ -77,6 +84,8 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict)
err:
if (dinfo) {
blk_unref(blk_by_legacy_dinfo(dinfo));
BlockBackend *blk = blk_by_legacy_dinfo(dinfo);
monitor_remove_blk(blk);
blk_unref(blk);
}
}

View File

@@ -180,8 +180,8 @@ aliases that leave holes then the lower priority region will appear in these
holes too.)
For example, suppose we have a container A of size 0x8000 with two subregions
B and C. B is a container mapped at 0x2000, size 0x4000, priority 1; C is
an MMIO region mapped at 0x0, size 0x6000, priority 2. B currently has two
B and C. B is a container mapped at 0x2000, size 0x4000, priority 2; C is
an MMIO region mapped at 0x0, size 0x6000, priority 1. B currently has two
of its own subregions: D of size 0x1000 at offset 0 and E of size 0x1000 at
offset 0x2000. As a diagram:
@@ -297,8 +297,9 @@ various constraints can be supplied to control how these callbacks are called:
- .valid.min_access_size, .valid.max_access_size define the access sizes
(in bytes) which the device accepts; accesses outside this range will
have device and bus specific behaviour (ignored, or machine check)
- .valid.aligned specifies that the device only accepts naturally aligned
accesses. Unaligned accesses invoke device and bus specific behaviour.
- .valid.unaligned specifies that the *device being modelled* supports
unaligned accesses; if false, unaligned accesses will invoke the
appropriate bus or CPU specific behaviour.
- .impl.min_access_size, .impl.max_access_size define the access sizes
(in bytes) supported by the *implementation*; other access sizes will be
emulated using the ones available. For example a 4-byte write will be
@@ -306,5 +307,5 @@ various constraints can be supplied to control how these callbacks are called:
- .impl.unaligned specifies that the *implementation* supports unaligned
accesses; if false, unaligned accesses will be emulated by two aligned
accesses.
- .old_mmio can be used to ease porting from code using
- .old_mmio eases the porting of code that was formerly using
cpu_register_io_memory(). It should not be used in new code.

View File

@@ -333,7 +333,7 @@ doesn't finish in a given time the switch is made to postcopy.
To enable postcopy, issue this command on the monitor prior to the
start of migration:
migrate_set_capability x-postcopy-ram on
migrate_set_capability postcopy-ram on
The normal commands are then used to start a migration, which is still
started in precopy mode. Issuing:

View File

@@ -24,8 +24,8 @@ A detailed command line would be:
-object memory-backend-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 -numa node,nodeid=0,cpus=0,memdev=ram-node0
-object memory-backend-ram,size=1024M,policy=bind,host-nodes=1,id=ram-node1 -numa node,nodeid=1,cpus=1,memdev=ram-node1
-device pxb,id=bridge1,bus=pci.0,numa_node=1,bus_nr=4 -netdev user,id=nd -device e1000,bus=bridge1,addr=0x4,netdev=nd
-device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8, -device e1000,bus=bridge2,addr=0x3
-device pxb,id=bridge3,bus=pci.0,bus_nr=40, -drive if=none,id=drive0,file=[img] -device virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1
-device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8 -device e1000,bus=bridge2,addr=0x3
-device pxb,id=bridge3,bus=pci.0,bus_nr=40 -drive if=none,id=drive0,file=[img] -device virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1
Here you have:
- 2 NUMA nodes for the guest, 0 and 1. (both mapped to the same NUMA node in host, but you can and should put it in different host NUMA nodes)
@@ -43,7 +43,7 @@ Implementation
==============
The PXB is composed by:
- HostBridge (TYPE_PXB_HOST)
The host bridge allows to register and query the PXB's rPCI root bus in QEMU.
The host bridge allows to register and query the PXB's PCI root bus in QEMU.
- PXBDev(TYPE_PXB_DEVICE)
It is a regular PCI Device that resides on the piix host-bridge bus and its bus uses the same PCI domain.
However, the bus behind is exposed through ACPI as a primary PCI bus and starts a new PCI hierarchy.

View File

@@ -1,7 +1,7 @@
= How to use the QAPI code generator =
Copyright IBM Corp. 2011
Copyright (C) 2012-2015 Red Hat, Inc.
Copyright (C) 2012-2016 Red Hat, Inc.
This work is licensed under the terms of the GNU GPL, version 2 or
later. See the COPYING file in the top-level directory.
@@ -52,7 +52,7 @@ schema. The documentation is delimited between two lines of ##, then
the first line names the expression, an optional overview is provided,
then individual documentation about each member of 'data' is provided,
and finally, a 'Since: x.y.z' tag lists the release that introduced
the expression. Optional fields are tagged with the phrase
the expression. Optional members are tagged with the phrase
'#optional', often with their default value; and extensions added
after the expression was first released are also given a '(since
x.y.z)' comment. For example:
@@ -108,15 +108,15 @@ user-defined type names, while built-in types are lowercase. Type
definitions should not end in 'Kind', as this namespace is used for
creating implicit C enums for visiting union types, or in 'List', as
this namespace is used for creating array types. Command names,
and field names within a type, should be all lower case with words
and member names within a type, should be all lower case with words
separated by a hyphen. However, some existing older commands and
complex types use underscore; when extending such expressions,
consistency is preferred over blindly avoiding underscore. Event
names should be ALL_CAPS with words separated by underscore. Field
names should be ALL_CAPS with words separated by underscore. Member
names cannot start with 'has-' or 'has_', as this is reserved for
tracking optional fields.
tracking optional members.
Any name (command, event, type, field, or enum value) beginning with
Any name (command, event, type, member, or enum value) beginning with
"x-" is marked experimental, and may be withdrawn or changed
incompatibly in a future release. All names must begin with a letter,
and contain only ASCII letters, digits, dash, and underscore. There
@@ -127,7 +127,7 @@ the vendor), even if the rest of the name uses dash (example:
__com.redhat_drive-mirror). Names beginning with 'q_' are reserved
for the generator: QMP names that resemble C keywords or other
problematic strings will be munged in C to use this prefix. For
example, a field named "default" in qapi becomes "q_default" in the
example, a member named "default" in qapi becomes "q_default" in the
generated C code.
In the rest of this document, usage lines are given for each
@@ -217,17 +217,18 @@ and must continue to work).
On output structures (only mentioned in the 'returns' side of a command),
changing from mandatory to optional is in general unsafe (older clients may be
expecting the field, and could crash if it is missing), although it can be done
if the only way that the optional argument will be omitted is when it is
triggered by the presence of a new input flag to the command that older clients
don't know to send. Changing from optional to mandatory is safe.
expecting the member, and could crash if it is missing), although it
can be done if the only way that the optional argument will be omitted
is when it is triggered by the presence of a new input flag to the
command that older clients don't know to send. Changing from optional
to mandatory is safe.
A structure that is used in both input and output of various commands
must consider the backwards compatibility constraints of both directions
of use.
A struct definition can specify another struct as its base.
In this case, the fields of the base type are included as top-level fields
In this case, the members of the base type are included as top-level members
of the new struct's dictionary in the Client JSON Protocol wire
format. An example definition is:
@@ -237,7 +238,7 @@ format. An example definition is:
'data': { '*backing': 'str' } }
An example BlockdevOptionsGenericCOWFormat object on the wire could use
both fields like this:
both members like this:
{ "file": "/some/place/my-image",
"backing": "/some/place/my-backing-file" }
@@ -262,7 +263,7 @@ The enum constants will be named by using a heuristic to turn the
type name into a set of underscore separated words. For the example
above, 'MyEnum' will turn into 'MY_ENUM' giving a constant name
of 'MY_ENUM_VALUE1' for the first value. If the default heuristic
does not result in a desirable name, the optional 'prefix' field
does not result in a desirable name, the optional 'prefix' member
can be used when defining the enum.
The enumeration values are passed as strings over the Client JSON
@@ -275,9 +276,9 @@ converting between strings and enum values. Since the wire format
always passes by name, it is acceptable to reorder or add new
enumeration members in any location without breaking clients of Client
JSON Protocol; however, removing enum values would break
compatibility. For any struct that has a field that will only contain
a finite set of string values, using an enum type for that field is
better than open-coding the field to be type 'str'.
compatibility. For any struct that has a member that will only contain
a finite set of string values, using an enum type for that member is
better than open-coding the member to be type 'str'.
=== Union types ===
@@ -305,8 +306,8 @@ values to data types like in this example:
'qcow2': 'Qcow2Options' } }
In the Client JSON Protocol, a simple union is represented by a
dictionary that contains the 'type' field as a discriminator, and a
'data' field that is of the specified data type corresponding to the
dictionary that contains the 'type' member as a discriminator, and a
'data' member that is of the specified data type corresponding to the
discriminator value, as in these examples:
{ "type": "file", "data" : { "filename": "/some/place/my-image" } }
@@ -321,14 +322,14 @@ enum. The value for each branch can be of any type.
A flat union definition specifies a struct as its base, and
avoids nesting on the wire. All branches of the union must be
complex types, and the top-level fields of the union dictionary on
the wire will be combination of fields from both the base type and the
complex types, and the top-level members of the union dictionary on
the wire will be combination of members from both the base type and the
appropriate branch type (when merging two dictionaries, there must be
no keys in common). The 'discriminator' field must be the name of an
no keys in common). The 'discriminator' member must be the name of an
enum-typed member of the base struct.
The following example enhances the above simple union example by
adding a common field 'readonly', renaming the discriminator to
adding a common member 'readonly', renaming the discriminator to
something more applicable, and reducing the number of {} required on
the wire:
@@ -353,8 +354,8 @@ the user, but because it must map to a base member with enum type, the
code generator can ensure that branches exist for all values of the
enum (although the order of the keys need not match the declaration of
the enum). In the resulting generated C data types, a flat union is
represented as a struct with the base member fields included directly,
and then a union of structures for each branch of the struct.
represented as a struct with the base members included directly, and
then a union of structures for each branch of the struct.
A simple union can always be re-written as a flat union where the base
class has a single member named 'type', and where each branch of the
@@ -424,10 +425,10 @@ string name of a complex type, or a dictionary that declares an
anonymous type with the same semantics as a 'struct' expression, with
one exception noted below when 'gen' is used.
The 'returns' member describes what will appear in the "return" field
The 'returns' member describes what will appear in the "return" member
of a Client JSON Protocol reply on successful completion of a command.
The member is optional from the command declaration; if absent, the
"return" field will be an empty dictionary. If 'returns' is present,
"return" member will be an empty dictionary. If 'returns' is present,
it must be the string name of a complex or built-in type, a
one-element array containing the name of a complex or built-in type,
with one exception noted below when 'gen' is used. Although it is
@@ -435,7 +436,7 @@ permitted to have the 'returns' member name a built-in type or an
array of built-in types, any command that does this cannot be extended
to return additional information in the future; thus, new commands
should strongly consider returning a dictionary-based type or an array
of dictionaries, even if the dictionary only contains one field at the
of dictionaries, even if the dictionary only contains one member at the
present.
All commands in Client JSON Protocol use a dictionary to report
@@ -478,7 +479,7 @@ response is not possible (although the command will still return a
normal dictionary error on failure). When a successful reply is not
possible, the command expression should include the optional key
'success-response' with boolean value false. So far, only QGA makes
use of this field.
use of this member.
=== Events ===
@@ -656,7 +657,7 @@ Union types
{ "name": "BlockdevOptions", "meta-type": "object",
"members": [
{ "name": "kind", "type": "BlockdevOptionsKind" } ],
{ "name": "type", "type": "BlockdevOptionsKind" } ],
"tag": "type",
"variants": [
{ "case": "file", "type": ":obj-FileOptions-wrapper" },
@@ -722,33 +723,38 @@ the names of built-in types. Clients should examine member
== Code generation ==
Schemas are fed into four scripts to generate all the code/files that,
Schemas are fed into five scripts to generate all the code/files that,
paired with the core QAPI libraries, comprise everything required to
take JSON commands read in by a Client JSON Protocol server, unmarshal
the arguments into the underlying C types, call into the corresponding
C function, and map the response back to a Client JSON Protocol
response to be returned to the user.
C function, map the response back to a Client JSON Protocol response
to be returned to the user, and introspect the commands.
As an example, we'll use the following schema, which describes a single
complex user-defined type (which will produce a C struct, along with a list
node structure that can be used to chain together a list of such types in
case we want to accept/return a list of this type with a command), and a
command which takes that type as a parameter and returns the same type:
As an example, we'll use the following schema, which describes a
single complex user-defined type, along with command which takes a
list of that type as a parameter, and returns a single element of that
type. The user is responsible for writing the implementation of
qmp_my_command(); everything else is produced by the generator.
$ cat example-schema.json
{ 'struct': 'UserDefOne',
'data': { 'integer': 'int', 'string': 'str' } }
'data': { 'integer': 'int', '*string': 'str' } }
{ 'command': 'my-command',
'data': {'arg1': 'UserDefOne'},
'data': { 'arg1': ['UserDefOne'] },
'returns': 'UserDefOne' }
{ 'event': 'MY_EVENT' }
For a more thorough look at generated code, the testsuite includes
tests/qapi-schema/qapi-schema-tests.json that covers more examples of
what the generator will accept, and compiles the resulting C code as
part of 'make check-unit'.
=== scripts/qapi-types.py ===
Used to generate the C types defined by a schema. The following files are
created:
Used to generate the C types defined by a schema, along with
supporting code. The following files are created:
$(prefix)qapi-types.h - C types corresponding to types defined in
the schema you pass in
@@ -763,38 +769,6 @@ Example:
$ python scripts/qapi-types.py --output-dir="qapi-generated" \
--prefix="example-" example-schema.json
$ cat qapi-generated/example-qapi-types.c
[Uninteresting stuff omitted...]
void qapi_free_UserDefOne(UserDefOne *obj)
{
QapiDeallocVisitor *qdv;
Visitor *v;
if (!obj) {
return;
}
qdv = qapi_dealloc_visitor_new();
v = qapi_dealloc_get_visitor(qdv);
visit_type_UserDefOne(v, &obj, NULL, NULL);
qapi_dealloc_visitor_cleanup(qdv);
}
void qapi_free_UserDefOneList(UserDefOneList *obj)
{
QapiDeallocVisitor *qdv;
Visitor *v;
if (!obj) {
return;
}
qdv = qapi_dealloc_visitor_new();
v = qapi_dealloc_get_visitor(qdv);
visit_type_UserDefOneList(v, &obj, NULL, NULL);
qapi_dealloc_visitor_cleanup(qdv);
}
$ cat qapi-generated/example-qapi-types.h
[Uninteresting stuff omitted...]
@@ -809,29 +783,59 @@ Example:
struct UserDefOne {
int64_t integer;
bool has_string;
char *string;
};
void qapi_free_UserDefOne(UserDefOne *obj);
struct UserDefOneList {
union {
UserDefOne *value;
uint64_t padding;
};
UserDefOneList *next;
UserDefOne *value;
};
void qapi_free_UserDefOneList(UserDefOneList *obj);
#endif
$ cat qapi-generated/example-qapi-types.c
[Uninteresting stuff omitted...]
void qapi_free_UserDefOne(UserDefOne *obj)
{
QapiDeallocVisitor *qdv;
Visitor *v;
if (!obj) {
return;
}
qdv = qapi_dealloc_visitor_new();
v = qapi_dealloc_get_visitor(qdv);
visit_type_UserDefOne(v, NULL, &obj, NULL);
qapi_dealloc_visitor_cleanup(qdv);
}
void qapi_free_UserDefOneList(UserDefOneList *obj)
{
QapiDeallocVisitor *qdv;
Visitor *v;
if (!obj) {
return;
}
qdv = qapi_dealloc_visitor_new();
v = qapi_dealloc_get_visitor(qdv);
visit_type_UserDefOneList(v, NULL, &obj, NULL);
qapi_dealloc_visitor_cleanup(qdv);
}
=== scripts/qapi-visit.py ===
Used to generate the visitor functions used to walk through and convert
a QObject (as provided by QMP) to a native C data structure and
vice-versa, as well as the visitor function used to dealloc a complex
schema-defined C type.
Used to generate the visitor functions used to walk through and
convert between a native QAPI C data structure and some other format
(such as QObject); the generated functions are named visit_type_FOO()
and visit_type_FOO_members().
The following files are generated:
@@ -848,41 +852,62 @@ Example:
$ python scripts/qapi-visit.py --output-dir="qapi-generated"
--prefix="example-" example-schema.json
$ cat qapi-generated/example-qapi-visit.h
[Uninteresting stuff omitted...]
#ifndef EXAMPLE_QAPI_VISIT_H
#define EXAMPLE_QAPI_VISIT_H
[Visitors for built-in types omitted...]
void visit_type_UserDefOne_members(Visitor *v, UserDefOne *obj, Error **errp);
void visit_type_UserDefOne(Visitor *v, const char *name, UserDefOne **obj, Error **errp);
void visit_type_UserDefOneList(Visitor *v, const char *name, UserDefOneList **obj, Error **errp);
#endif
$ cat qapi-generated/example-qapi-visit.c
[Uninteresting stuff omitted...]
static void visit_type_UserDefOne_fields(Visitor *v, UserDefOne **obj, Error **errp)
void visit_type_UserDefOne_members(Visitor *v, UserDefOne *obj, Error **errp)
{
Error *err = NULL;
visit_type_int(v, &(*obj)->integer, "integer", &err);
visit_type_int(v, "integer", &obj->integer, &err);
if (err) {
goto out;
}
visit_type_str(v, &(*obj)->string, "string", &err);
if (err) {
goto out;
if (visit_optional(v, "string", &obj->has_string)) {
visit_type_str(v, "string", &obj->string, &err);
if (err) {
goto out;
}
}
out:
error_propagate(errp, err);
}
void visit_type_UserDefOne(Visitor *v, UserDefOne **obj, const char *name, Error **errp)
void visit_type_UserDefOne(Visitor *v, const char *name, UserDefOne **obj, Error **errp)
{
Error *err = NULL;
visit_start_struct(v, (void **)obj, "UserDefOne", name, sizeof(UserDefOne), &err);
if (!err) {
if (*obj) {
visit_type_UserDefOne_fields(v, obj, errp);
}
visit_end_struct(v, &err);
visit_start_struct(v, name, (void **)obj, sizeof(UserDefOne), &err);
if (err) {
goto out;
}
if (!*obj) {
goto out_obj;
}
visit_type_UserDefOne_members(v, *obj, &err);
error_propagate(errp, err);
err = NULL;
out_obj:
visit_end_struct(v, &err);
out:
error_propagate(errp, err);
}
void visit_type_UserDefOneList(Visitor *v, UserDefOneList **obj, const char *name, Error **errp)
void visit_type_UserDefOneList(Visitor *v, const char *name, UserDefOneList **obj, Error **errp)
{
Error *err = NULL;
GenericList *i, **prev;
@@ -893,35 +918,24 @@ Example:
}
for (prev = (GenericList **)obj;
!err && (i = visit_next_list(v, prev, &err)) != NULL;
!err && (i = visit_next_list(v, prev, sizeof(**obj))) != NULL;
prev = &i) {
UserDefOneList *native_i = (UserDefOneList *)i;
visit_type_UserDefOne(v, &native_i->value, NULL, &err);
visit_type_UserDefOne(v, NULL, &native_i->value, &err);
}
error_propagate(errp, err);
err = NULL;
visit_end_list(v, &err);
visit_end_list(v);
out:
error_propagate(errp, err);
}
$ cat qapi-generated/example-qapi-visit.h
[Uninteresting stuff omitted...]
#ifndef EXAMPLE_QAPI_VISIT_H
#define EXAMPLE_QAPI_VISIT_H
[Visitors for built-in types omitted...]
void visit_type_UserDefOne(Visitor *v, UserDefOne **obj, const char *name, Error **errp);
void visit_type_UserDefOneList(Visitor *v, UserDefOneList **obj, const char *name, Error **errp);
#endif
=== scripts/qapi-commands.py ===
Used to generate the marshaling/dispatch functions for the commands defined
in the schema. The following files are generated:
Used to generate the marshaling/dispatch functions for the commands
defined in the schema. The generated code implements
qmp_marshal_COMMAND() (mentioned in qmp-commands.hx, and registered
automatically), and declares qmp_COMMAND() that the user must
implement. The following files are generated:
$(prefix)qmp-marshal.c: command marshal/dispatch functions for each
QMP command defined in the schema. Functions
@@ -939,6 +953,19 @@ Example:
$ python scripts/qapi-commands.py --output-dir="qapi-generated"
--prefix="example-" example-schema.json
$ cat qapi-generated/example-qmp-commands.h
[Uninteresting stuff omitted...]
#ifndef EXAMPLE_QMP_COMMANDS_H
#define EXAMPLE_QMP_COMMANDS_H
#include "example-qapi-types.h"
#include "qapi/qmp/qdict.h"
#include "qapi/error.h"
UserDefOne *qmp_my_command(UserDefOneList *arg1, Error **errp);
#endif
$ cat qapi-generated/example-qmp-marshal.c
[Uninteresting stuff omitted...]
@@ -950,7 +977,7 @@ Example:
Visitor *v;
v = qmp_output_get_visitor(qov);
visit_type_UserDefOne(v, &ret_in, "unused", &err);
visit_type_UserDefOne(v, "unused", &ret_in, &err);
if (err) {
goto out;
}
@@ -961,7 +988,7 @@ Example:
qmp_output_visitor_cleanup(qov);
qdv = qapi_dealloc_visitor_new();
v = qapi_dealloc_get_visitor(qdv);
visit_type_UserDefOne(v, &ret_in, "unused", NULL);
visit_type_UserDefOne(v, "unused", &ret_in, NULL);
qapi_dealloc_visitor_cleanup(qdv);
}
@@ -972,10 +999,10 @@ Example:
QmpInputVisitor *qiv = qmp_input_visitor_new_strict(QOBJECT(args));
QapiDeallocVisitor *qdv;
Visitor *v;
UserDefOne *arg1 = NULL;
UserDefOneList *arg1 = NULL;
v = qmp_input_get_visitor(qiv);
visit_type_UserDefOne(v, &arg1, "arg1", &err);
visit_type_UserDefOneList(v, "arg1", &arg1, &err);
if (err) {
goto out;
}
@@ -992,7 +1019,7 @@ Example:
qmp_input_visitor_cleanup(qiv);
qdv = qapi_dealloc_visitor_new();
v = qapi_dealloc_get_visitor(qdv);
visit_type_UserDefOne(v, &arg1, "arg1", NULL);
visit_type_UserDefOneList(v, "arg1", &arg1, NULL);
qapi_dealloc_visitor_cleanup(qdv);
}
@@ -1002,24 +1029,12 @@ Example:
}
qapi_init(qmp_init_marshal);
$ cat qapi-generated/example-qmp-commands.h
[Uninteresting stuff omitted...]
#ifndef EXAMPLE_QMP_COMMANDS_H
#define EXAMPLE_QMP_COMMANDS_H
#include "example-qapi-types.h"
#include "qapi/qmp/qdict.h"
#include "qapi/error.h"
UserDefOne *qmp_my_command(UserDefOne *arg1, Error **errp);
#endif
=== scripts/qapi-event.py ===
Used to generate the event-related C code defined by a schema. The
following files are created:
Used to generate the event-related C code defined by a schema, with
implementations for qapi_event_send_FOO(). The following files are
created:
$(prefix)qapi-event.h - Function prototypes for each event type, plus an
enumeration of all event names
@@ -1029,6 +1044,27 @@ Example:
$ python scripts/qapi-event.py --output-dir="qapi-generated"
--prefix="example-" example-schema.json
$ cat qapi-generated/example-qapi-event.h
[Uninteresting stuff omitted...]
#ifndef EXAMPLE_QAPI_EVENT_H
#define EXAMPLE_QAPI_EVENT_H
#include "qapi/error.h"
#include "qapi/qmp/qdict.h"
#include "example-qapi-types.h"
void qapi_event_send_my_event(Error **errp);
typedef enum example_QAPIEvent {
EXAMPLE_QAPI_EVENT_MY_EVENT = 0,
EXAMPLE_QAPI_EVENT__MAX = 1,
} example_QAPIEvent;
extern const char *const example_QAPIEvent_lookup[];
#endif
$ cat qapi-generated/example-qapi-event.c
[Uninteresting stuff omitted...]
@@ -1054,27 +1090,6 @@ Example:
[EXAMPLE_QAPI_EVENT_MY_EVENT] = "MY_EVENT",
[EXAMPLE_QAPI_EVENT__MAX] = NULL,
};
$ cat qapi-generated/example-qapi-event.h
[Uninteresting stuff omitted...]
#ifndef EXAMPLE_QAPI_EVENT_H
#define EXAMPLE_QAPI_EVENT_H
#include "qapi/error.h"
#include "qapi/qmp/qdict.h"
#include "example-qapi-types.h"
void qapi_event_send_my_event(Error **errp);
typedef enum example_QAPIEvent {
EXAMPLE_QAPI_EVENT_MY_EVENT = 0,
EXAMPLE_QAPI_EVENT__MAX = 1,
} example_QAPIEvent;
extern const char *const example_QAPIEvent_lookup[];
#endif
=== scripts/qapi-introspect.py ===
@@ -1089,17 +1104,6 @@ Example:
$ python scripts/qapi-introspect.py --output-dir="qapi-generated"
--prefix="example-" example-schema.json
$ cat qapi-generated/example-qmp-introspect.c
[Uninteresting stuff omitted...]
const char example_qmp_schema_json[] = "["
"{\"arg-type\": \"0\", \"meta-type\": \"event\", \"name\": \"MY_EVENT\"}, "
"{\"arg-type\": \"1\", \"meta-type\": \"command\", \"name\": \"my-command\", \"ret-type\": \"2\"}, "
"{\"members\": [], \"meta-type\": \"object\", \"name\": \"0\"}, "
"{\"members\": [{\"name\": \"arg1\", \"type\": \"2\"}], \"meta-type\": \"object\", \"name\": \"1\"}, "
"{\"members\": [{\"name\": \"integer\", \"type\": \"int\"}, {\"name\": \"string\", \"type\": \"str\"}], \"meta-type\": \"object\", \"name\": \"2\"}, "
"{\"json-type\": \"int\", \"meta-type\": \"builtin\", \"name\": \"int\"}, "
"{\"json-type\": \"string\", \"meta-type\": \"builtin\", \"name\": \"str\"}]";
$ cat qapi-generated/example-qmp-introspect.h
[Uninteresting stuff omitted...]
@@ -1109,3 +1113,15 @@ Example:
extern const char example_qmp_schema_json[];
#endif
$ cat qapi-generated/example-qmp-introspect.c
[Uninteresting stuff omitted...]
const char example_qmp_schema_json[] = "["
"{\"arg-type\": \"0\", \"meta-type\": \"event\", \"name\": \"MY_EVENT\"}, "
"{\"arg-type\": \"1\", \"meta-type\": \"command\", \"name\": \"my-command\", \"ret-type\": \"2\"}, "
"{\"members\": [], \"meta-type\": \"object\", \"name\": \"0\"}, "
"{\"members\": [{\"name\": \"arg1\", \"type\": \"[2]\"}], \"meta-type\": \"object\", \"name\": \"1\"}, "
"{\"members\": [{\"name\": \"integer\", \"type\": \"int\"}, {\"default\": null, \"name\": \"string\", \"type\": \"str\"}], \"meta-type\": \"object\", \"name\": \"2\"}, "
"{\"element-type\": \"2\", \"meta-type\": \"array\", \"name\": \"[2]\"}, "
"{\"json-type\": \"int\", \"meta-type\": \"builtin\", \"name\": \"int\"}, "
"{\"json-type\": \"string\", \"meta-type\": \"builtin\", \"name\": \"str\"}]";

View File

@@ -325,6 +325,7 @@ Emitted to report a corruption of a Quorum file.
Data:
- "type": Quorum operation type
- "error": Error message (json-string, optional)
Only present on failure. This field contains a human-readable
error message. There are no semantics other than that the
@@ -336,10 +337,18 @@ Data:
Example:
Read operation:
{ "event": "QUORUM_REPORT_BAD",
"data": { "node-name": "1.raw", "sector-num": 345435, "sectors-count": 5 },
"data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5,
"type": "read" },
"timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
Flush operation:
{ "event": "QUORUM_REPORT_BAD",
"data": { "node-name": "node0", "sector-num": 0, "sectors-count": 2097120,
"type": "flush", "error": "Broken pipe" },
"timestamp": { "seconds": 1456406829, "microseconds": 291763 } }
Note: this event is rate-limited.
RESET

View File

@@ -3,7 +3,7 @@
0. About This Document
======================
Copyright (C) 2009-2015 Red Hat, Inc.
Copyright (C) 2009-2016 Red Hat, Inc.
This work is licensed under the terms of the GNU GPL, version 2 or
later. See the COPYING file in the top-level directory.
@@ -277,7 +277,7 @@ However, Clients must not assume any particular:
- Amount of errors generated by a command, that is, new errors can be added
to any existing command in newer versions of the Server
Any command or field name beginning with "x-" is deemed experimental,
Any command or member name beginning with "x-" is deemed experimental,
and may be withdrawn or changed in an incompatible manner in a future
release.

View File

@@ -107,7 +107,7 @@ at the specified moments of time. There are several kinds of timers:
sources (e.g. real time clock chip). Host clock is the one of the sources
of non-determinism. Host clock read operations should be logged to
make the execution deterministic.
* Real time clock for icount. This clock is similar to real time clock but
* Virtual real time clock. This clock is similar to real time clock but
it is used only for increasing virtual clock while virtual machine is
sleeping. Due to its nature it is also non-deterministic as the host clock
and has to be logged too.
@@ -134,11 +134,20 @@ of time. That's why we do not process a group of timers until the checkpoint
event will be read from the log. Such an event allows synchronizing CPU
execution and timer events.
Another checkpoints application in record/replay is instruction counting
while the virtual machine is idle. This function (qemu_clock_warp) is called
from the wait loop. It changes virtual machine state and must be deterministic
then. That is why we added checkpoint to this function to prevent its
operation in replay mode when it does not correspond to record mode.
Two other checkpoints govern the "warping" of the virtual clock.
While the virtual machine is idle, the virtual clock increments at
1 ns per *real time* nanosecond. This is done by setting up a timer
(called the warp timer) on the virtual real time clock, so that the
timer fires at the next deadline of the virtual clock; the virtual clock
is then incremented (which is called "warping" the virtual clock) as
soon as the timer fires or the CPUs need to go out of the idle state.
Two functions are used for this purpose; because these actions change
virtual machine state and must be deterministic, each of them creates a
checkpoint. qemu_start_warp_timer checks if the CPUs are idle and if so
starts accounting real time to virtual clock. qemu_account_warp_timer
is called when the CPUs get an interrupt or when the warp timer fires,
and it warps the virtual clock by the amount of real time that has passed
since qemu_start_warp_timer.
Bottom halves
-------------

View File

@@ -84,6 +84,15 @@ Selector Register address: Base + 8 (2 bytes)
Data Register address: Base + 0 (8 bytes)
DMA Address address: Base + 16 (8 bytes)
== ACPI Interface ==
The fw_cfg device is defined with ACPI ID "QEMU0002". Since we expect
ACPI tables to be passed into the guest through the fw_cfg device itself,
the guest-side firmware can not use ACPI to find fw_cfg. However, once the
firmware is finished setting up ACPI tables and hands control over to the
guest kernel, the latter can use the fw_cfg ACPI node for a more accurate
inventory of in-use IOport or MMIO regions.
== Firmware Configuration Items ==
=== Signature (Key 0x0000, FW_CFG_SIGNATURE) ===

View File

@@ -15,13 +15,23 @@ The 1000 -> 10ff device ID range is used as follows for virtio-pci devices.
Note that this allocation separate from the virtio device IDs, which are
maintained as part of the virtio specification.
1af4:1000 network device
1af4:1001 block device
1af4:1002 balloon device
1af4:1003 console device
1af4:1004 SCSI host bus adapter device
1af4:1005 entropy generator device
1af4:1009 9p filesystem device
1af4:1000 network device (legacy)
1af4:1001 block device (legacy)
1af4:1002 balloon device (legacy)
1af4:1003 console device (legacy)
1af4:1004 SCSI host bus adapter device (legacy)
1af4:1005 entropy generator device (legacy)
1af4:1009 9p filesystem device (legacy)
1af4:1041 network device (modern)
1af4:1042 block device (modern)
1af4:1043 console device (modern)
1af4:1044 entropy generator device (modern)
1af4:1045 balloon device (modern)
1af4:1048 SCSI host bus adapter device (modern)
1af4:1049 9p filesystem device (modern)
1af4:1050 virtio gpu device (modern)
1af4:1052 virtio input device (modern)
1af4:10f0 Available for experimental usage without registration. Must get
to official ID when the code leaves the test lab (i.e. when seeking

View File

@@ -172,9 +172,6 @@ source tree. It may not be as powerful as platform-specific or third-party
trace backends but it is portable. This is the recommended trace backend
unless you have specific needs for more advanced backends.
The "simple" backend currently does not capture string arguments, it simply
records the char* pointer value instead of the string that is pointed to.
=== Ftrace ===
The "ftrace" backend writes trace data to ftrace marker. This effectively
@@ -347,3 +344,44 @@ This will immediately call:
and will generate the TCG code to call:
void trace_foo(uint8_t a1, uint32_t a2);
=== "vcpu" ===
Identifies events that trace vCPU-specific information. It implicitly adds a
"CPUState*" argument, and extends the tracing print format to show the vCPU
information. If used together with the "tcg" property, it adds a second
"TCGv_env" argument that must point to the per-target global TCG register that
points to the vCPU when guest code is executed (usually the "cpu_env" variable).
The following example events:
foo(uint32_t a) "a=%x"
vcpu bar(uint32_t a) "a=%x"
tcg vcpu baz(uint32_t a) "a=%x", "a=%x"
Can be used as:
#include "trace-tcg.h"
CPUArchState *env;
TCGv_ptr cpu_env;
void some_disassembly_func(...)
{
/* trace emitted at this point */
trace_foo(0xd1);
/* trace emitted at this point */
trace_bar(ENV_GET_CPU(env), 0xd2);
/* trace emitted at this point (env) and when guest code is executed (cpu_env) */
trace_baz_tcg(ENV_GET_CPU(env), cpu_env, 0xd3);
}
If the translating vCPU has address 0xc1 and code is later executed by vCPU
0xc2, this would be an example output:
// at guest code translation
foo a=0xd1
bar cpu=0xc1 a=0xd2
baz_trans cpu=0xc1 a=0xd3
// at guest code execution
baz_exec cpu=0xc2 a=0xd3

234
exec.c
View File

@@ -135,6 +135,7 @@ typedef struct PhysPageMap {
struct AddressSpaceDispatch {
struct rcu_head rcu;
MemoryRegionSection *mru_section;
/* This is a multi-level map on the physical address space.
* The bottom level has pointers to MemoryRegionSections.
*/
@@ -307,6 +308,17 @@ static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
}
}
static inline bool section_covers_addr(const MemoryRegionSection *section,
hwaddr addr)
{
/* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
* the section must cover the entire address space.
*/
return section->size.hi ||
range_covers_byte(section->offset_within_address_space,
section->size.lo, addr);
}
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
Node *nodes, MemoryRegionSection *sections)
{
@@ -322,9 +334,7 @@ static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
}
if (sections[lp.ptr].size.hi ||
range_covers_byte(sections[lp.ptr].offset_within_address_space,
sections[lp.ptr].size.lo, addr)) {
if (section_covers_addr(&sections[lp.ptr], addr)) {
return &sections[lp.ptr];
} else {
return &sections[PHYS_SECTION_UNASSIGNED];
@@ -342,14 +352,25 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
hwaddr addr,
bool resolve_subpage)
{
MemoryRegionSection *section;
MemoryRegionSection *section = atomic_read(&d->mru_section);
subpage_t *subpage;
bool update;
section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
section_covers_addr(section, addr)) {
update = false;
} else {
section = phys_page_find(d->phys_map, addr, d->map.nodes,
d->map.sections);
update = true;
}
if (resolve_subpage && section->mr->subpage) {
subpage = container_of(section->mr, subpage_t, iomem);
section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
}
if (update) {
atomic_set(&d->mru_section, section);
}
return section;
}
@@ -1207,92 +1228,83 @@ void qemu_mutex_unlock_ramlist(void)
}
#ifdef __linux__
#include <sys/vfs.h>
#define HUGETLBFS_MAGIC 0x958458f6
static long gethugepagesize(const char *path, Error **errp)
{
struct statfs fs;
int ret;
do {
ret = statfs(path, &fs);
} while (ret != 0 && errno == EINTR);
if (ret != 0) {
error_setg_errno(errp, errno, "failed to get page size of file %s",
path);
return 0;
}
return fs.f_bsize;
}
static void *file_ram_alloc(RAMBlock *block,
ram_addr_t memory,
const char *path,
Error **errp)
{
struct stat st;
bool unlink_on_error = false;
char *filename;
char *sanitized_name;
char *c;
void *area;
int fd;
uint64_t hpagesize;
Error *local_err = NULL;
hpagesize = gethugepagesize(path, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto error;
}
block->mr->align = hpagesize;
if (memory < hpagesize) {
error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
"or larger than huge page size 0x%" PRIx64,
memory, hpagesize);
goto error;
}
int64_t page_size;
if (kvm_enabled() && !kvm_has_sync_mmu()) {
error_setg(errp,
"host lacks kvm mmu notifiers, -mem-path unsupported");
goto error;
return NULL;
}
if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
/* Make name safe to use with mkstemp by replacing '/' with '_'. */
sanitized_name = g_strdup(memory_region_name(block->mr));
for (c = sanitized_name; *c != '\0'; c++) {
if (*c == '/') {
*c = '_';
}
}
filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
sanitized_name);
g_free(sanitized_name);
fd = mkstemp(filename);
for (;;) {
fd = open(path, O_RDWR);
if (fd >= 0) {
unlink(filename);
/* @path names an existing file, use it */
break;
}
g_free(filename);
} else {
fd = open(path, O_RDWR | O_CREAT, 0644);
if (errno == ENOENT) {
/* @path names a file that doesn't exist, create it */
fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
if (fd >= 0) {
unlink_on_error = true;
break;
}
} else if (errno == EISDIR) {
/* @path names a directory, create a file there */
/* Make name safe to use with mkstemp by replacing '/' with '_'. */
sanitized_name = g_strdup(memory_region_name(block->mr));
for (c = sanitized_name; *c != '\0'; c++) {
if (*c == '/') {
*c = '_';
}
}
filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
sanitized_name);
g_free(sanitized_name);
fd = mkstemp(filename);
if (fd >= 0) {
unlink(filename);
g_free(filename);
break;
}
g_free(filename);
}
if (errno != EEXIST && errno != EINTR) {
error_setg_errno(errp, errno,
"can't open backing store %s for guest RAM",
path);
goto error;
}
/*
* Try again on EINTR and EEXIST. The latter happens when
* something else creates the file between our two open().
*/
}
if (fd < 0) {
error_setg_errno(errp, errno,
"unable to create backing store for hugepages");
page_size = qemu_fd_getpagesize(fd);
block->mr->align = page_size;
if (memory < page_size) {
error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
"or larger than page size 0x%" PRIx64,
memory, page_size);
goto error;
}
memory = ROUND_UP(memory, hpagesize);
memory = ROUND_UP(memory, page_size);
/*
* ftruncate is not supported by hugetlbfs in older
@@ -1304,10 +1316,10 @@ static void *file_ram_alloc(RAMBlock *block,
perror("ftruncate");
}
area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
area = qemu_ram_mmap(fd, memory, page_size, block->flags & RAM_SHARED);
if (area == MAP_FAILED) {
error_setg_errno(errp, errno,
"unable to map backing store for hugepages");
"unable to map backing store for guest RAM");
close(fd);
goto error;
}
@@ -1320,6 +1332,10 @@ static void *file_ram_alloc(RAMBlock *block,
return area;
error:
if (unlink_on_error) {
unlink(path);
}
close(fd);
return NULL;
}
#endif
@@ -1554,7 +1570,7 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
}
}
static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
static void ram_block_add(RAMBlock *new_block, Error **errp)
{
RAMBlock *block;
RAMBlock *last_block = NULL;
@@ -1573,7 +1589,7 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
if (err) {
error_propagate(errp, err);
qemu_mutex_unlock_ramlist();
return -1;
return;
}
} else {
new_block->host = phys_mem_alloc(new_block->max_length,
@@ -1583,7 +1599,7 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
"cannot set up guest memory '%s'",
memory_region_name(new_block->mr));
qemu_mutex_unlock_ramlist();
return -1;
return;
}
memory_try_enable_merging(new_block->host, new_block->max_length);
}
@@ -1631,22 +1647,19 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
kvm_setup_guest_memory(new_block->host, new_block->max_length);
}
}
return new_block->offset;
}
#ifdef __linux__
ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
bool share, const char *mem_path,
Error **errp)
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
bool share, const char *mem_path,
Error **errp)
{
RAMBlock *new_block;
ram_addr_t addr;
Error *local_err = NULL;
if (xen_enabled()) {
error_setg(errp, "-mem-path not supported with Xen");
return -1;
return NULL;
}
if (phys_mem_alloc != qemu_anon_ram_alloc) {
@@ -1657,7 +1670,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
*/
error_setg(errp,
"-mem-path not supported with this accelerator");
return -1;
return NULL;
}
size = HOST_PAGE_ALIGN(size);
@@ -1670,29 +1683,28 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
mem_path, errp);
if (!new_block->host) {
g_free(new_block);
return -1;
return NULL;
}
addr = ram_block_add(new_block, &local_err);
ram_block_add(new_block, &local_err);
if (local_err) {
g_free(new_block);
error_propagate(errp, local_err);
return -1;
return NULL;
}
return addr;
return new_block;
}
#endif
static
ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
void (*resized)(const char*,
uint64_t length,
void *host),
void *host, bool resizeable,
MemoryRegion *mr, Error **errp)
RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
void (*resized)(const char*,
uint64_t length,
void *host),
void *host, bool resizeable,
MemoryRegion *mr, Error **errp)
{
RAMBlock *new_block;
ram_addr_t addr;
Error *local_err = NULL;
size = HOST_PAGE_ALIGN(size);
@@ -1711,29 +1723,27 @@ ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
if (resizeable) {
new_block->flags |= RAM_RESIZEABLE;
}
addr = ram_block_add(new_block, &local_err);
ram_block_add(new_block, &local_err);
if (local_err) {
g_free(new_block);
error_propagate(errp, local_err);
return -1;
return NULL;
}
mr->ram_block = new_block;
return addr;
return new_block;
}
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
MemoryRegion *mr, Error **errp)
{
return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
}
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
{
return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
}
ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
void (*resized)(const char*,
uint64_t length,
void *host),
@@ -1759,22 +1769,15 @@ static void reclaim_ramblock(RAMBlock *block)
g_free(block);
}
void qemu_ram_free(ram_addr_t addr)
void qemu_ram_free(RAMBlock *block)
{
RAMBlock *block;
qemu_mutex_lock_ramlist();
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if (addr == block->offset) {
QLIST_REMOVE_RCU(block, next);
ram_list.mru_block = NULL;
/* Write list before version */
smp_wmb();
ram_list.version++;
call_rcu(block, reclaim_ramblock, rcu);
break;
}
}
QLIST_REMOVE_RCU(block, next);
ram_list.mru_block = NULL;
/* Write list before version */
smp_wmb();
ram_list.version++;
call_rcu(block, reclaim_ramblock, rcu);
qemu_mutex_unlock_ramlist();
}
@@ -2707,7 +2710,8 @@ MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
}
} else {
/* RAM case */
ptr = qemu_get_ram_ptr(mr->ram_block, mr->ram_addr + addr1);
ptr = qemu_get_ram_ptr(mr->ram_block,
memory_region_get_ram_addr(mr) + addr1);
memcpy(buf, ptr, l);
}

View File

@@ -83,4 +83,4 @@ static void fsdev_register_config(void)
qemu_add_opts(&qemu_fsdev_opts);
qemu_add_opts(&qemu_virtfs_opts);
}
machine_init(fsdev_register_config);
opts_init(fsdev_register_config);

View File

@@ -1752,7 +1752,7 @@ int gdbserver_start(const char *device)
sigaction(SIGINT, &act, NULL);
}
#endif
chr = qemu_chr_new("gdb", device, NULL);
chr = qemu_chr_new_noreplay("gdb", device, NULL);
if (!chr)
return -1;

View File

@@ -1026,7 +1026,7 @@ ETEXI
.args_type = "",
.params = "",
.help = "Followup to a migration command to switch the migration"
" to postcopy mode. The x-postcopy-ram capability must "
" to postcopy mode. The postcopy-ram capability must "
"be set before the original migration command.",
.mhandler.cmd = hmp_migrate_start_postcopy,
},
@@ -1201,8 +1201,8 @@ ETEXI
{
.name = "drive_add",
.args_type = "pci_addr:s,opts:s",
.params = "[[<domain>:]<bus>:]<slot>\n"
.args_type = "node:-n,pci_addr:s,opts:s",
.params = "[-n] [[<domain>:]<bus>:]<slot>\n"
"[file=file][,if=type][,bus=n]\n"
"[,unit=m][,media=d][,index=i]\n"
"[,cyls=c,heads=h,secs=s[,trans=t]]\n"

View File

@@ -2,7 +2,7 @@ common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o pcihp.o
common-obj-$(CONFIG_ACPI_X86_ICH) += ich9.o tco.o
common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o cpu_hotplug_acpi_table.o
common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o memory_hotplug_acpi_table.o
common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
common-obj-$(CONFIG_ACPI) += acpi_interface.o
common-obj-$(CONFIG_ACPI) += bios-linker-loader.o
common-obj-$(CONFIG_ACPI) += aml-build.o

View File

@@ -258,6 +258,34 @@ static void build_append_int(GArray *table, uint64_t value)
}
}
/*
* Build NAME(XXXX, 0x00000000) where 0x00000000 is encoded as a dword,
* and return the offset to 0x00000000 for runtime patching.
*
* Warning: runtime patching is best avoided. Only use this as
* a replacement for DataTableRegion (for guests that don't
* support it).
*/
int
build_append_named_dword(GArray *array, const char *name_format, ...)
{
int offset;
va_list ap;
build_append_byte(array, 0x08); /* NameOp */
va_start(ap, name_format);
build_append_namestringv(array, name_format, ap);
va_end(ap);
build_append_byte(array, 0x0C); /* DWordPrefix */
offset = array->len;
build_append_int_noprefix(array, 0x00000000, 4);
assert(array->len == offset + 4);
return offset;
}
static GPtrArray *alloc_list;
static Aml *aml_alloc(void)
@@ -942,14 +970,14 @@ Aml *aml_package(uint8_t num_elements)
/* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefOpRegion */
Aml *aml_operation_region(const char *name, AmlRegionSpace rs,
uint32_t offset, uint32_t len)
Aml *offset, uint32_t len)
{
Aml *var = aml_alloc();
build_append_byte(var->buf, 0x5B); /* ExtOpPrefix */
build_append_byte(var->buf, 0x80); /* OpRegionOp */
build_append_namestring(var->buf, "%s", name);
build_append_byte(var->buf, rs);
build_append_int(var->buf, offset);
aml_append(var, offset);
build_append_int(var->buf, len);
return var;
}
@@ -997,6 +1025,20 @@ Aml *create_field_common(int opcode, Aml *srcbuf, Aml *index, const char *name)
return var;
}
/* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefCreateField */
Aml *aml_create_field(Aml *srcbuf, Aml *bit_index, Aml *num_bits,
const char *name)
{
Aml *var = aml_alloc();
build_append_byte(var->buf, 0x5B); /* ExtOpPrefix */
build_append_byte(var->buf, 0x13); /* CreateFieldOp */
aml_append(var, srcbuf);
aml_append(var, bit_index);
aml_append(var, num_bits);
build_append_namestring(var->buf, "%s", name);
return var;
}
/* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefCreateDWordField */
Aml *aml_create_dword_field(Aml *srcbuf, Aml *index, const char *name)
{
@@ -1423,6 +1465,13 @@ Aml *aml_alias(const char *source_object, const char *alias_object)
return var;
}
/* ACPI 1.0b: 16.2.5.4 Type 2 Opcodes Encoding: DefConcat */
Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target)
{
return build_opcode_2arg_dst(0x73 /* ConcatOp */, source1, source2,
target);
}
void
build_header(GArray *linker, GArray *table_data,
AcpiTableHeader *h, const char *sig, int len, uint8_t rev,

View File

@@ -26,7 +26,6 @@
#include "hw/nvram/fw_cfg.h"
#include "qemu/config-file.h"
#include "qapi/opts-visitor.h"
#include "qapi/dealloc-visitor.h"
#include "qapi-visit.h"
#include "qapi-event.h"
@@ -68,7 +67,7 @@ static void acpi_register_config(void)
qemu_add_opts(&qemu_acpi_opts);
}
machine_init(acpi_register_config);
opts_init(acpi_register_config);
static int acpi_checksum(const uint8_t *data, int len)
{
@@ -297,15 +296,7 @@ void acpi_table_add(const QemuOpts *opts, Error **errp)
out:
g_free(blob);
g_strfreev(pathnames);
if (hdrs != NULL) {
QapiDeallocVisitor *dv;
dv = qapi_dealloc_visitor_new();
visit_type_AcpiTableOptions(qapi_dealloc_get_visitor(dv), NULL, &hdrs,
NULL);
qapi_dealloc_visitor_cleanup(dv);
}
qapi_free_AcpiTableOptions(hdrs);
error_propagate(errp, err);
}

View File

@@ -29,6 +29,8 @@
#include "qemu/osdep.h"
#include "hw/acpi/acpi.h"
#include "hw/acpi/aml-build.h"
#include "hw/acpi/bios-linker-loader.h"
#include "hw/nvram/fw_cfg.h"
#include "hw/mem/nvdimm.h"
static int nvdimm_plugged_device_list(Object *obj, void *opaque)
@@ -370,15 +372,131 @@ static void nvdimm_build_nfit(GSList *device_list, GArray *table_offsets,
g_array_free(structures, true);
}
struct NvdimmDsmIn {
uint32_t handle;
uint32_t revision;
uint32_t function;
/* the remaining size in the page is used by arg3. */
union {
uint8_t arg3[0];
};
} QEMU_PACKED;
typedef struct NvdimmDsmIn NvdimmDsmIn;
struct NvdimmDsmOut {
/* the size of buffer filled by QEMU. */
uint32_t len;
uint8_t data[0];
} QEMU_PACKED;
typedef struct NvdimmDsmOut NvdimmDsmOut;
struct NvdimmDsmFunc0Out {
/* the size of buffer filled by QEMU. */
uint32_t len;
uint32_t supported_func;
} QEMU_PACKED;
typedef struct NvdimmDsmFunc0Out NvdimmDsmFunc0Out;
struct NvdimmDsmFuncNoPayloadOut {
/* the size of buffer filled by QEMU. */
uint32_t len;
uint32_t func_ret_status;
} QEMU_PACKED;
typedef struct NvdimmDsmFuncNoPayloadOut NvdimmDsmFuncNoPayloadOut;
static uint64_t
nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
{
nvdimm_debug("BUG: we never read _DSM IO Port.\n");
return 0;
}
static void
nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
{
NvdimmDsmIn *in;
hwaddr dsm_mem_addr = val;
nvdimm_debug("dsm memory address %#" HWADDR_PRIx ".\n", dsm_mem_addr);
/*
* The DSM memory is mapped to guest address space so an evil guest
* can change its content while we are doing DSM emulation. Avoid
* this by copying DSM memory to QEMU local memory.
*/
in = g_malloc(TARGET_PAGE_SIZE);
cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);
le32_to_cpus(&in->revision);
le32_to_cpus(&in->function);
le32_to_cpus(&in->handle);
nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
in->handle, in->function);
/*
* function 0 is called to inquire which functions are supported by
* OSPM
*/
if (in->function == 0) {
NvdimmDsmFunc0Out func0 = {
.len = cpu_to_le32(sizeof(func0)),
/* No function supported other than function 0 */
.supported_func = cpu_to_le32(0),
};
cpu_physical_memory_write(dsm_mem_addr, &func0, sizeof func0);
} else {
/* No function except function 0 is supported yet. */
NvdimmDsmFuncNoPayloadOut out = {
.len = cpu_to_le32(sizeof(out)),
.func_ret_status = cpu_to_le32(1) /* Not Supported */,
};
cpu_physical_memory_write(dsm_mem_addr, &out, sizeof(out));
}
g_free(in);
}
static const MemoryRegionOps nvdimm_dsm_ops = {
.read = nvdimm_dsm_read,
.write = nvdimm_dsm_write,
.endianness = DEVICE_LITTLE_ENDIAN,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
},
};
void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io,
FWCfgState *fw_cfg, Object *owner)
{
memory_region_init_io(&state->io_mr, owner, &nvdimm_dsm_ops, state,
"nvdimm-acpi-io", NVDIMM_ACPI_IO_LEN);
memory_region_add_subregion(io, NVDIMM_ACPI_IO_BASE, &state->io_mr);
state->dsm_mem = g_array_new(false, true /* clear */, 1);
acpi_data_push(state->dsm_mem, TARGET_PAGE_SIZE);
fw_cfg_add_file(fw_cfg, NVDIMM_DSM_MEM_FILE, state->dsm_mem->data,
state->dsm_mem->len);
}
#define NVDIMM_COMMON_DSM "NCAL"
#define NVDIMM_ACPI_MEM_ADDR "MEMA"
static void nvdimm_build_common_dsm(Aml *dev)
{
Aml *method, *ifctx, *function;
Aml *method, *ifctx, *function, *dsm_mem, *unpatched, *result_size;
uint8_t byte_list[1];
method = aml_method(NVDIMM_COMMON_DSM, 4, AML_NOTSERIALIZED);
method = aml_method(NVDIMM_COMMON_DSM, 4, AML_SERIALIZED);
function = aml_arg(2);
dsm_mem = aml_name(NVDIMM_ACPI_MEM_ADDR);
/*
* do not support any method if DSM memory address has not been
* patched.
*/
unpatched = aml_if(aml_equal(dsm_mem, aml_int(0x0)));
/*
* function 0 is called to inquire what functions are supported by
@@ -387,12 +505,38 @@ static void nvdimm_build_common_dsm(Aml *dev)
ifctx = aml_if(aml_equal(function, aml_int(0)));
byte_list[0] = 0 /* No function Supported */;
aml_append(ifctx, aml_return(aml_buffer(1, byte_list)));
aml_append(method, ifctx);
aml_append(unpatched, ifctx);
/* No function is supported yet. */
byte_list[0] = 1 /* Not Supported */;
aml_append(method, aml_return(aml_buffer(1, byte_list)));
aml_append(unpatched, aml_return(aml_buffer(1, byte_list)));
aml_append(method, unpatched);
/*
* The HDLE indicates the DSM function is issued from which device,
* it is not used at this time as no function is supported yet.
* Currently we make it always be 0 for all the devices and will set
* the appropriate value once real function is implemented.
*/
aml_append(method, aml_store(aml_int(0x0), aml_name("HDLE")));
aml_append(method, aml_store(aml_arg(1), aml_name("REVS")));
aml_append(method, aml_store(aml_arg(2), aml_name("FUNC")));
/*
* tell QEMU about the real address of DSM memory, then QEMU
* gets the control and fills the result in DSM memory.
*/
aml_append(method, aml_store(dsm_mem, aml_name("NTFI")));
result_size = aml_local(1);
aml_append(method, aml_store(aml_name("RLEN"), result_size));
aml_append(method, aml_store(aml_shiftleft(result_size, aml_int(3)),
result_size));
aml_append(method, aml_create_field(aml_name("ODAT"), aml_int(0),
result_size, "OBUF"));
aml_append(method, aml_concatenate(aml_buffer(0, NULL), aml_name("OBUF"),
aml_arg(6)));
aml_append(method, aml_return(aml_arg(6)));
aml_append(dev, method);
}
@@ -435,7 +579,8 @@ static void nvdimm_build_nvdimm_devices(GSList *device_list, Aml *root_dev)
static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets,
GArray *table_data, GArray *linker)
{
Aml *ssdt, *sb_scope, *dev;
Aml *ssdt, *sb_scope, *dev, *field;
int mem_addr_offset, nvdimm_ssdt;
acpi_add_table(table_offsets, table_data);
@@ -459,19 +604,89 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets,
*/
aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0012")));
/* map DSM memory and IO into ACPI namespace. */
aml_append(dev, aml_operation_region("NPIO", AML_SYSTEM_IO,
aml_int(NVDIMM_ACPI_IO_BASE), NVDIMM_ACPI_IO_LEN));
aml_append(dev, aml_operation_region("NRAM", AML_SYSTEM_MEMORY,
aml_name(NVDIMM_ACPI_MEM_ADDR), TARGET_PAGE_SIZE));
/*
* DSM notifier:
* NTFI: write the address of DSM memory and notify QEMU to emulate
* the access.
*
* It is the IO port so that accessing them will cause VM-exit, the
* control will be transferred to QEMU.
*/
field = aml_field("NPIO", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE);
aml_append(field, aml_named_field("NTFI",
sizeof(uint32_t) * BITS_PER_BYTE));
aml_append(dev, field);
/*
* DSM input:
* HDLE: store device's handle, it's zero if the _DSM call happens
* on NVDIMM Root Device.
* REVS: store the Arg1 of _DSM call.
* FUNC: store the Arg2 of _DSM call.
* ARG3: store the Arg3 of _DSM call.
*
* They are RAM mapping on host so that these accesses never cause
* VM-EXIT.
*/
field = aml_field("NRAM", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE);
aml_append(field, aml_named_field("HDLE",
sizeof(typeof_field(NvdimmDsmIn, handle)) * BITS_PER_BYTE));
aml_append(field, aml_named_field("REVS",
sizeof(typeof_field(NvdimmDsmIn, revision)) * BITS_PER_BYTE));
aml_append(field, aml_named_field("FUNC",
sizeof(typeof_field(NvdimmDsmIn, function)) * BITS_PER_BYTE));
aml_append(field, aml_named_field("ARG3",
(TARGET_PAGE_SIZE - offsetof(NvdimmDsmIn, arg3)) *
BITS_PER_BYTE));
aml_append(dev, field);
/*
* DSM output:
* RLEN: the size of the buffer filled by QEMU.
* ODAT: the buffer QEMU uses to store the result.
*
* Since the page is reused by both input and out, the input data
* will be lost after storing new result into ODAT so we should fetch
* all the input data before writing the result.
*/
field = aml_field("NRAM", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE);
aml_append(field, aml_named_field("RLEN",
sizeof(typeof_field(NvdimmDsmOut, len)) * BITS_PER_BYTE));
aml_append(field, aml_named_field("ODAT",
(TARGET_PAGE_SIZE - offsetof(NvdimmDsmOut, data)) *
BITS_PER_BYTE));
aml_append(dev, field);
nvdimm_build_common_dsm(dev);
nvdimm_build_device_dsm(dev);
nvdimm_build_nvdimm_devices(device_list, dev);
aml_append(sb_scope, dev);
aml_append(ssdt, sb_scope);
nvdimm_ssdt = table_data->len;
/* copy AML table into ACPI tables blob and patch header there */
g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
mem_addr_offset = build_append_named_dword(table_data,
NVDIMM_ACPI_MEM_ADDR);
bios_linker_loader_alloc(linker, NVDIMM_DSM_MEM_FILE, TARGET_PAGE_SIZE,
false /* high memory */);
bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
NVDIMM_DSM_MEM_FILE, table_data,
table_data->data + mem_addr_offset,
sizeof(uint32_t));
build_header(linker, table_data,
(void *)(table_data->data + table_data->len - ssdt->buf->len),
"SSDT", ssdt->buf->len, 1, NULL, "NVDIMM");
(void *)(table_data->data + nvdimm_ssdt),
"SSDT", table_data->len - nvdimm_ssdt, 1, NULL, "NVDIMM");
free_aml_allocator();
}

View File

@@ -111,7 +111,7 @@ static void clipper_init(MachineState *machine)
}
size = load_elf(palcode_filename, cpu_alpha_superpage_to_phys,
NULL, &palcode_entry, &palcode_low, &palcode_high,
0, EM_ALPHA, 0);
0, EM_ALPHA, 0, 0);
if (size < 0) {
error_report("could not load palcode '%s'", palcode_filename);
exit(1);
@@ -131,7 +131,7 @@ static void clipper_init(MachineState *machine)
size = load_elf(kernel_filename, cpu_alpha_superpage_to_phys,
NULL, &kernel_entry, &kernel_low, &kernel_high,
0, EM_ALPHA, 0);
0, EM_ALPHA, 0, 0);
if (size < 0) {
error_report("could not load kernel '%s'", kernel_filename);
exit(1);

View File

@@ -16,3 +16,4 @@ obj-$(CONFIG_STM32F205_SOC) += stm32f205_soc.o
obj-$(CONFIG_XLNX_ZYNQMP) += xlnx-zynqmp.o xlnx-ep108.o
obj-$(CONFIG_FSL_IMX25) += fsl-imx25.o imx25_pdk.o
obj-$(CONFIG_FSL_IMX31) += fsl-imx31.o kzm.o
obj-$(CONFIG_ASPEED_SOC) += ast2400.o palmetto-bmc.o

View File

@@ -211,7 +211,7 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
if (kernel_filename) {
image_size = load_elf(kernel_filename, NULL, NULL, &entry, &lowaddr,
NULL, big_endian, EM_ARM, 1);
NULL, big_endian, EM_ARM, 1, 0);
if (image_size < 0) {
image_size = load_image_targphys(kernel_filename, 0, mem_size);
lowaddr = 0;

137
hw/arm/ast2400.c Normal file
View File

@@ -0,0 +1,137 @@
/*
* AST2400 SoC
*
* Andrew Jeffery <andrew@aj.id.au>
* Jeremy Kerr <jk@ozlabs.org>
*
* Copyright 2016 IBM Corp.
*
* This code is licensed under the GPL version 2 or later. See
* the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "exec/address-spaces.h"
#include "hw/arm/ast2400.h"
#include "hw/char/serial.h"
#define AST2400_UART_5_BASE 0x00184000
#define AST2400_IOMEM_SIZE 0x00200000
#define AST2400_IOMEM_BASE 0x1E600000
#define AST2400_VIC_BASE 0x1E6C0000
#define AST2400_TIMER_BASE 0x1E782000
static const int uart_irqs[] = { 9, 32, 33, 34, 10 };
static const int timer_irqs[] = { 16, 17, 18, 35, 36, 37, 38, 39, };
/*
* IO handlers: simply catch any reads/writes to IO addresses that aren't
* handled by a device mapping.
*/
static uint64_t ast2400_io_read(void *p, hwaddr offset, unsigned size)
{
qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n",
__func__, offset, size);
return 0;
}
static void ast2400_io_write(void *opaque, hwaddr offset, uint64_t value,
unsigned size)
{
qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " <- 0x%" PRIx64 " [%u]\n",
__func__, offset, value, size);
}
static const MemoryRegionOps ast2400_io_ops = {
.read = ast2400_io_read,
.write = ast2400_io_write,
.endianness = DEVICE_LITTLE_ENDIAN,
};
static void ast2400_init(Object *obj)
{
AST2400State *s = AST2400(obj);
s->cpu = cpu_arm_init("arm926");
object_initialize(&s->vic, sizeof(s->vic), TYPE_ASPEED_VIC);
object_property_add_child(obj, "vic", OBJECT(&s->vic), NULL);
qdev_set_parent_bus(DEVICE(&s->vic), sysbus_get_default());
object_initialize(&s->timerctrl, sizeof(s->timerctrl), TYPE_ASPEED_TIMER);
object_property_add_child(obj, "timerctrl", OBJECT(&s->timerctrl), NULL);
qdev_set_parent_bus(DEVICE(&s->timerctrl), sysbus_get_default());
}
static void ast2400_realize(DeviceState *dev, Error **errp)
{
int i;
AST2400State *s = AST2400(dev);
Error *err = NULL;
/* IO space */
memory_region_init_io(&s->iomem, NULL, &ast2400_io_ops, NULL,
"ast2400.io", AST2400_IOMEM_SIZE);
memory_region_add_subregion_overlap(get_system_memory(), AST2400_IOMEM_BASE,
&s->iomem, -1);
/* VIC */
object_property_set_bool(OBJECT(&s->vic), true, "realized", &err);
if (err) {
error_propagate(errp, err);
return;
}
sysbus_mmio_map(SYS_BUS_DEVICE(&s->vic), 0, AST2400_VIC_BASE);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->vic), 0,
qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
sysbus_connect_irq(SYS_BUS_DEVICE(&s->vic), 1,
qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_FIQ));
/* Timer */
object_property_set_bool(OBJECT(&s->timerctrl), true, "realized", &err);
if (err) {
error_propagate(errp, err);
return;
}
sysbus_mmio_map(SYS_BUS_DEVICE(&s->timerctrl), 0, AST2400_TIMER_BASE);
for (i = 0; i < ARRAY_SIZE(timer_irqs); i++) {
qemu_irq irq = qdev_get_gpio_in(DEVICE(&s->vic), timer_irqs[i]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq);
}
/* UART - attach an 8250 to the IO space as our UART5 */
if (serial_hds[0]) {
qemu_irq uart5 = qdev_get_gpio_in(DEVICE(&s->vic), uart_irqs[4]);
serial_mm_init(&s->iomem, AST2400_UART_5_BASE, 2,
uart5, 38400, serial_hds[0], DEVICE_LITTLE_ENDIAN);
}
}
static void ast2400_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
dc->realize = ast2400_realize;
/*
* Reason: creates an ARM CPU, thus use after free(), see
* arm_cpu_class_init()
*/
dc->cannot_destroy_with_object_finalize_yet = true;
}
static const TypeInfo ast2400_type_info = {
.name = TYPE_AST2400,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(AST2400State),
.instance_init = ast2400_init,
.class_init = ast2400_class_init,
};
static void ast2400_register_types(void)
{
type_register_static(&ast2400_type_info);
}
type_init(ast2400_register_types)

View File

@@ -12,6 +12,7 @@
#include "hw/arm/bcm2835_peripherals.h"
#include "hw/misc/bcm2835_mbox_defs.h"
#include "hw/arm/raspi_platform.h"
#include "sysemu/char.h"
/* Peripheral base address on the VC (GPU) system bus */
#define BCM2835_VC_PERI_BASE 0x7e000000
@@ -48,6 +49,11 @@ static void bcm2835_peripherals_init(Object *obj)
object_property_add_child(obj, "uart0", OBJECT(s->uart0), NULL);
qdev_set_parent_bus(DEVICE(s->uart0), sysbus_get_default());
/* AUX / UART1 */
object_initialize(&s->aux, sizeof(s->aux), TYPE_BCM2835_AUX);
object_property_add_child(obj, "aux", OBJECT(&s->aux), NULL);
qdev_set_parent_bus(DEVICE(&s->aux), sysbus_get_default());
/* Mailboxes */
object_initialize(&s->mboxes, sizeof(s->mboxes), TYPE_BCM2835_MBOX);
object_property_add_child(obj, "mbox", OBJECT(&s->mboxes), NULL);
@@ -56,6 +62,16 @@ static void bcm2835_peripherals_init(Object *obj)
object_property_add_const_link(OBJECT(&s->mboxes), "mbox-mr",
OBJECT(&s->mbox_mr), &error_abort);
/* Framebuffer */
object_initialize(&s->fb, sizeof(s->fb), TYPE_BCM2835_FB);
object_property_add_child(obj, "fb", OBJECT(&s->fb), NULL);
object_property_add_alias(obj, "vcram-size", OBJECT(&s->fb), "vcram-size",
&error_abort);
qdev_set_parent_bus(DEVICE(&s->fb), sysbus_get_default());
object_property_add_const_link(OBJECT(&s->fb), "dma-mr",
OBJECT(&s->gpu_bus_mr), &error_abort);
/* Property channel */
object_initialize(&s->property, sizeof(s->property), TYPE_BCM2835_PROPERTY);
object_property_add_child(obj, "property", OBJECT(&s->property), NULL);
@@ -63,6 +79,8 @@ static void bcm2835_peripherals_init(Object *obj)
"board-rev", &error_abort);
qdev_set_parent_bus(DEVICE(&s->property), sysbus_get_default());
object_property_add_const_link(OBJECT(&s->property), "fb",
OBJECT(&s->fb), &error_abort);
object_property_add_const_link(OBJECT(&s->property), "dma-mr",
OBJECT(&s->gpu_bus_mr), &error_abort);
@@ -70,6 +88,14 @@ static void bcm2835_peripherals_init(Object *obj)
object_initialize(&s->sdhci, sizeof(s->sdhci), TYPE_SYSBUS_SDHCI);
object_property_add_child(obj, "sdhci", OBJECT(&s->sdhci), NULL);
qdev_set_parent_bus(DEVICE(&s->sdhci), sysbus_get_default());
/* DMA Channels */
object_initialize(&s->dma, sizeof(s->dma), TYPE_BCM2835_DMA);
object_property_add_child(obj, "dma", OBJECT(&s->dma), NULL);
qdev_set_parent_bus(DEVICE(&s->dma), sysbus_get_default());
object_property_add_const_link(OBJECT(&s->dma), "dma-mr",
OBJECT(&s->gpu_bus_mr), &error_abort);
}
static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
@@ -78,7 +104,8 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
Object *obj;
MemoryRegion *ram;
Error *err = NULL;
uint32_t ram_size;
uint32_t ram_size, vcram_size;
CharDriverState *chr;
int n;
obj = object_property_get_link(OBJECT(dev), "ram", &err);
@@ -131,6 +158,29 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
INTERRUPT_UART));
/* AUX / UART1 */
/* TODO: don't call qemu_char_get_next_serial() here, instead set
* chardev properties for each uart at the board level, once pl011
* (uart0) has been updated to avoid qemu_char_get_next_serial()
*/
chr = qemu_char_get_next_serial();
if (chr == NULL) {
chr = qemu_chr_new("bcm2835.uart1", "null", NULL);
}
qdev_prop_set_chr(DEVICE(&s->aux), "chardev", chr);
object_property_set_bool(OBJECT(&s->aux), true, "realized", &err);
if (err) {
error_propagate(errp, err);
return;
}
memory_region_add_subregion(&s->peri_mr, UART1_OFFSET,
sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->aux), 0));
sysbus_connect_irq(SYS_BUS_DEVICE(&s->aux), 0,
qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
INTERRUPT_AUX));
/* Mailboxes */
object_property_set_bool(OBJECT(&s->mboxes), true, "realized", &err);
if (err) {
@@ -144,13 +194,33 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_ARM_IRQ,
INTERRUPT_ARM_MAILBOX));
/* Property channel */
object_property_set_int(OBJECT(&s->property), ram_size, "ram-size", &err);
/* Framebuffer */
vcram_size = (uint32_t)object_property_get_int(OBJECT(s), "vcram-size",
&err);
if (err) {
error_propagate(errp, err);
return;
}
object_property_set_int(OBJECT(&s->fb), ram_size - vcram_size,
"vcram-base", &err);
if (err) {
error_propagate(errp, err);
return;
}
object_property_set_bool(OBJECT(&s->fb), true, "realized", &err);
if (err) {
error_propagate(errp, err);
return;
}
memory_region_add_subregion(&s->mbox_mr, MBOX_CHAN_FB << MBOX_AS_CHAN_SHIFT,
sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->fb), 0));
sysbus_connect_irq(SYS_BUS_DEVICE(&s->fb), 0,
qdev_get_gpio_in(DEVICE(&s->mboxes), MBOX_CHAN_FB));
/* Property channel */
object_property_set_bool(OBJECT(&s->property), true, "realized", &err);
if (err) {
error_propagate(errp, err);
@@ -171,6 +241,13 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
return;
}
object_property_set_bool(OBJECT(&s->sdhci), true, "pending-insert-quirk",
&err);
if (err) {
error_propagate(errp, err);
return;
}
object_property_set_bool(OBJECT(&s->sdhci), true, "realized", &err);
if (err) {
error_propagate(errp, err);
@@ -189,6 +266,24 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
return;
}
/* DMA Channels */
object_property_set_bool(OBJECT(&s->dma), true, "realized", &err);
if (err) {
error_propagate(errp, err);
return;
}
memory_region_add_subregion(&s->peri_mr, DMA_OFFSET,
sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->dma), 0));
memory_region_add_subregion(&s->peri_mr, DMA15_OFFSET,
sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->dma), 1));
for (n = 0; n <= 12; n++) {
sysbus_connect_irq(SYS_BUS_DEVICE(&s->dma), n,
qdev_get_gpio_in_named(DEVICE(&s->ic),
BCM2835_IC_GPU_IRQ,
INTERRUPT_DMA0 + n));
}
}
static void bcm2835_peripherals_class_init(ObjectClass *oc, void *data)
@@ -196,6 +291,8 @@ static void bcm2835_peripherals_class_init(ObjectClass *oc, void *data)
DeviceClass *dc = DEVICE_CLASS(oc);
dc->realize = bcm2835_peripherals_realize;
/* Reason: realize() method uses qemu_char_get_next_serial() */
dc->cannot_instantiate_with_device_add_yet = true;
}
static const TypeInfo bcm2835_peripherals_type_info = {

View File

@@ -42,6 +42,8 @@ static void bcm2836_init(Object *obj)
&error_abort);
object_property_add_alias(obj, "board-rev", OBJECT(&s->peripherals),
"board-rev", &error_abort);
object_property_add_alias(obj, "vcram-size", OBJECT(&s->peripherals),
"vcram-size", &error_abort);
qdev_set_parent_bus(DEVICE(&s->peripherals), sysbus_get_default());
}

View File

@@ -518,9 +518,34 @@ static void do_cpu_reset(void *opaque)
cpu_reset(cs);
if (info) {
if (!info->is_linux) {
int i;
/* Jump to the entry point. */
uint64_t entry = info->entry;
switch (info->endianness) {
case ARM_ENDIANNESS_LE:
env->cp15.sctlr_el[1] &= ~SCTLR_E0E;
for (i = 1; i < 4; ++i) {
env->cp15.sctlr_el[i] &= ~SCTLR_EE;
}
env->uncached_cpsr &= ~CPSR_E;
break;
case ARM_ENDIANNESS_BE8:
env->cp15.sctlr_el[1] |= SCTLR_E0E;
for (i = 1; i < 4; ++i) {
env->cp15.sctlr_el[i] |= SCTLR_EE;
}
env->uncached_cpsr |= CPSR_E;
break;
case ARM_ENDIANNESS_BE32:
env->cp15.sctlr_el[1] |= SCTLR_B;
break;
case ARM_ENDIANNESS_UNKNOWN:
break; /* Board's decision */
default:
g_assert_not_reached();
}
if (!env->aarch64) {
env->thumb = info->entry & 1;
entry &= 0xfffffffe;
@@ -638,6 +663,62 @@ static int do_arm_linux_init(Object *obj, void *opaque)
return 0;
}
static uint64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
uint64_t *lowaddr, uint64_t *highaddr,
int elf_machine)
{
bool elf_is64;
union {
Elf32_Ehdr h32;
Elf64_Ehdr h64;
} elf_header;
int data_swab = 0;
bool big_endian;
uint64_t ret = -1;
Error *err = NULL;
load_elf_hdr(info->kernel_filename, &elf_header, &elf_is64, &err);
if (err) {
return ret;
}
if (elf_is64) {
big_endian = elf_header.h64.e_ident[EI_DATA] == ELFDATA2MSB;
info->endianness = big_endian ? ARM_ENDIANNESS_BE8
: ARM_ENDIANNESS_LE;
} else {
big_endian = elf_header.h32.e_ident[EI_DATA] == ELFDATA2MSB;
if (big_endian) {
if (bswap32(elf_header.h32.e_flags) & EF_ARM_BE8) {
info->endianness = ARM_ENDIANNESS_BE8;
} else {
info->endianness = ARM_ENDIANNESS_BE32;
/* In BE32, the CPU has a different view of the per-byte
* address map than the rest of the system. BE32 ELF files
* are organised such that they can be programmed through
* the CPU's per-word byte-reversed view of the world. QEMU
* however loads ELF files independently of the CPU. So
* tell the ELF loader to byte reverse the data for us.
*/
data_swab = 2;
}
} else {
info->endianness = ARM_ENDIANNESS_LE;
}
}
ret = load_elf(info->kernel_filename, NULL, NULL,
pentry, lowaddr, highaddr, big_endian, elf_machine,
1, data_swab);
if (ret <= 0) {
/* The header loaded but the image didn't */
exit(1);
}
return ret;
}
static void arm_load_kernel_notify(Notifier *notifier, void *data)
{
CPUState *cs;
@@ -647,7 +728,6 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
uint64_t elf_entry, elf_low_addr, elf_high_addr;
int elf_machine;
hwaddr entry, kernel_load_offset;
int big_endian;
static const ARMInsnFixup *primary_loader;
ArmLoadKernelNotifier *n = DO_UPCAST(ArmLoadKernelNotifier,
notifier, notifier);
@@ -733,12 +813,6 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
if (info->nb_cpus == 0)
info->nb_cpus = 1;
#ifdef TARGET_WORDS_BIGENDIAN
big_endian = 1;
#else
big_endian = 0;
#endif
/* We want to put the initrd far enough into RAM that when the
* kernel is uncompressed it will not clobber the initrd. However
* on boards without much RAM we must ensure that we still leave
@@ -753,9 +827,8 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
MIN(info->ram_size / 2, 128 * 1024 * 1024);
/* Assume that raw images are linux kernels, and ELF images are not. */
kernel_size = load_elf(info->kernel_filename, NULL, NULL, &elf_entry,
&elf_low_addr, &elf_high_addr, big_endian,
elf_machine, 1);
kernel_size = arm_load_elf(info, &elf_entry, &elf_low_addr,
&elf_high_addr, elf_machine);
if (kernel_size > 0 && have_dtb(info)) {
/* If there is still some room left at the base of RAM, try and put
* the DTB there like we do for images loaded with -bios or -pflash.

View File

@@ -181,4 +181,4 @@ static void exynos4_machines_init(void)
type_register_static(&smdkc210_type);
}
machine_init(exynos4_machines_init)
type_init(exynos4_machines_init)

View File

@@ -291,6 +291,7 @@ static void fsl_imx25_class_init(ObjectClass *oc, void *data)
* arm_cpu_class_init()
*/
dc->cannot_destroy_with_object_finalize_yet = true;
dc->desc = "i.MX25 SOC";
}
static const TypeInfo fsl_imx25_type_info = {

View File

@@ -265,6 +265,7 @@ static void fsl_imx31_class_init(ObjectClass *oc, void *data)
* arm_cpu_class_init()
*/
dc->cannot_destroy_with_object_finalize_yet = true;
dc->desc = "i.MX31 SOC";
}
static const TypeInfo fsl_imx31_type_info = {

View File

@@ -156,4 +156,4 @@ static void gumstix_machine_init(void)
type_register_static(&verdex_type);
}
machine_init(gumstix_machine_init)
type_init(gumstix_machine_init)

View File

@@ -437,4 +437,4 @@ static void calxeda_machines_init(void)
type_register_static(&midway_type);
}
machine_init(calxeda_machines_init)
type_init(calxeda_machines_init)

View File

@@ -1450,4 +1450,4 @@ static void nseries_machine_init(void)
type_register_static(&n810_type);
}
machine_init(nseries_machine_init)
type_init(nseries_machine_init)

View File

@@ -252,4 +252,4 @@ static void sx1_machine_init(void)
type_register_static(&sx1_machine_v2_type);
}
machine_init(sx1_machine_init)
type_init(sx1_machine_init)

65
hw/arm/palmetto-bmc.c Normal file
View File

@@ -0,0 +1,65 @@
/*
* OpenPOWER Palmetto BMC
*
* Andrew Jeffery <andrew@aj.id.au>
*
* Copyright 2016 IBM Corp.
*
* This code is licensed under the GPL version 2 or later. See
* the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "exec/address-spaces.h"
#include "hw/arm/arm.h"
#include "hw/arm/ast2400.h"
#include "hw/boards.h"
static struct arm_boot_info palmetto_bmc_binfo = {
.loader_start = AST2400_SDRAM_BASE,
.board_id = 0,
.nb_cpus = 1,
};
typedef struct PalmettoBMCState {
AST2400State soc;
MemoryRegion ram;
} PalmettoBMCState;
static void palmetto_bmc_init(MachineState *machine)
{
PalmettoBMCState *bmc;
bmc = g_new0(PalmettoBMCState, 1);
object_initialize(&bmc->soc, (sizeof(bmc->soc)), TYPE_AST2400);
object_property_add_child(OBJECT(machine), "soc", OBJECT(&bmc->soc),
&error_abort);
memory_region_allocate_system_memory(&bmc->ram, NULL, "ram", ram_size);
memory_region_add_subregion(get_system_memory(), AST2400_SDRAM_BASE,
&bmc->ram);
object_property_add_const_link(OBJECT(&bmc->soc), "ram", OBJECT(&bmc->ram),
&error_abort);
object_property_set_bool(OBJECT(&bmc->soc), true, "realized",
&error_abort);
palmetto_bmc_binfo.kernel_filename = machine->kernel_filename;
palmetto_bmc_binfo.initrd_filename = machine->initrd_filename;
palmetto_bmc_binfo.kernel_cmdline = machine->kernel_cmdline;
palmetto_bmc_binfo.ram_size = ram_size;
arm_load_kernel(ARM_CPU(first_cpu), &palmetto_bmc_binfo);
}
static void palmetto_bmc_machine_init(MachineClass *mc)
{
mc->desc = "OpenPOWER Palmetto BMC";
mc->init = palmetto_bmc_init;
mc->max_cpus = 1;
mc->no_sdcard = 1;
mc->no_floppy = 1;
mc->no_cdrom = 1;
mc->no_sdcard = 1;
mc->no_parallel = 1;
}
DEFINE_MACHINE("palmetto-bmc", palmetto_bmc_machine_init);

View File

@@ -113,6 +113,7 @@ static void setup_boot(MachineState *machine, int version, size_t ram_size)
static void raspi2_init(MachineState *machine)
{
RasPiState *s = g_new0(RasPiState, 1);
uint32_t vcram_size;
DriveInfo *di;
BlockBackend *blk;
BusState *bus;
@@ -149,7 +150,9 @@ static void raspi2_init(MachineState *machine)
qdev_prop_set_drive(carddev, "drive", blk, &error_fatal);
object_property_set_bool(OBJECT(carddev), true, "realized", &error_fatal);
setup_boot(machine, 2, machine->ram_size);
vcram_size = object_property_get_int(OBJECT(&s->soc), "vcram-size",
&error_abort);
setup_boot(machine, 2, machine->ram_size - vcram_size);
}
static void raspi2_machine_init(MachineClass *mc)
@@ -161,11 +164,6 @@ static void raspi2_machine_init(MachineClass *mc)
mc->no_floppy = 1;
mc->no_cdrom = 1;
mc->max_cpus = BCM2836_NCPUS;
/* XXX: Temporary restriction in RAM size from the full 1GB. Since
* we do not yet support the framebuffer / GPU, we need to limit
* RAM usable by the OS to sit below the peripherals.
*/
mc->default_ram_size = 0x3F000000; /* BCM2836_PERI_BASE */
mc->default_ram_size = 1024 * 1024 * 1024;
};
DEFINE_MACHINE("raspi2", raspi2_machine_init)

View File

@@ -457,4 +457,4 @@ static void realview_machine_init(void)
type_register_static(&realview_pbx_a9_type);
}
machine_init(realview_machine_init)
type_init(realview_machine_init)

View File

@@ -1037,7 +1037,7 @@ static void spitz_machine_init(void)
type_register_static(&terrierpda_type);
}
machine_init(spitz_machine_init)
type_init(spitz_machine_init)
static bool is_version_0(void *opaque, int version_id)
{

View File

@@ -1420,7 +1420,7 @@ static void stellaris_machine_init(void)
type_register_static(&lm3s6965evb_type);
}
machine_init(stellaris_machine_init)
type_init(stellaris_machine_init)
static void stellaris_i2c_class_init(ObjectClass *klass, void *data)
{

View File

@@ -240,7 +240,7 @@ static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque)
mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
reg_attr[2 * i] = cpu_to_be32(mmio_base);
reg_attr[2 * i + 1] = cpu_to_be32(
memory_region_size(&vdev->regions[i]->mem));
memory_region_size(vdev->regions[i]->mem));
}
qemu_fdt_setprop(fdt, nodename, "reg", reg_attr,
vbasedev->num_regions * 2 * sizeof(uint32_t));
@@ -374,7 +374,7 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque)
mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
reg_attr[2 * i] = cpu_to_be32(mmio_base);
reg_attr[2 * i + 1] = cpu_to_be32(
memory_region_size(&vdev->regions[i]->mem));
memory_region_size(vdev->regions[i]->mem));
}
qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr,
vbasedev->num_regions * 2 * sizeof(uint32_t));

View File

@@ -419,7 +419,7 @@ static void versatile_machine_init(void)
type_register_static(&versatileab_type);
}
machine_init(versatile_machine_init)
type_init(versatile_machine_init)
static void vpb_sic_class_init(ObjectClass *klass, void *data)
{

View File

@@ -800,4 +800,4 @@ static void vexpress_machine_init(void)
type_register_static(&vexpress_a15_info);
}
machine_init(vexpress_machine_init);
type_init(vexpress_machine_init);

View File

@@ -81,6 +81,20 @@ static void acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap,
aml_append(scope, dev);
}
static void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap)
{
Aml *dev = aml_device("FWCF");
aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002")));
/* device present, functioning, decoding, not shown in UI */
aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
Aml *crs = aml_resource_template();
aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base,
fw_cfg_memmap->size, AML_READ_WRITE));
aml_append(dev, aml_name_decl("_CRS", crs));
aml_append(scope, dev);
}
static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap)
{
Aml *dev, *crs;
@@ -549,6 +563,7 @@ build_dsdt(GArray *table_data, GArray *linker, VirtGuestInfo *guest_info)
acpi_dsdt_add_uart(scope, &memmap[VIRT_UART],
(irqmap[VIRT_UART] + ARM_SPI_BASE));
acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]);
acpi_dsdt_add_fw_cfg(scope, &memmap[VIRT_FW_CFG]);
acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO],
(irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS);
acpi_dsdt_add_pci(scope, memmap, (irqmap[VIRT_PCIE] + ARM_SPI_BASE),

View File

@@ -73,6 +73,7 @@ typedef struct VirtBoardInfo {
uint32_t clock_phandle;
uint32_t gic_phandle;
uint32_t v2m_phandle;
bool using_psci;
} VirtBoardInfo;
typedef struct {
@@ -95,6 +96,23 @@ typedef struct {
#define VIRT_MACHINE_CLASS(klass) \
OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE)
/* RAM limit in GB. Since VIRT_MEM starts at the 1GB mark, this means
* RAM can go up to the 256GB mark, leaving 256GB of the physical
* address space unallocated and free for future use between 256G and 512G.
* If we need to provide more RAM to VMs in the future then we need to:
* * allocate a second bank of RAM starting at 2TB and working up
* * fix the DT and ACPI table generation code in QEMU to correctly
* report two split lumps of RAM to the guest
* * fix KVM in the host kernel to allow guests with >40 bit address spaces
* (We don't want to fill all the way up to 512GB with RAM because
* we might want it for non-RAM purposes later. Conversely it seems
* reasonable to assume that anybody configuring a VM with a quarter
* of a terabyte of RAM will be doing it on a host with more than a
* terabyte of physical address space.)
*/
#define RAMLIMIT_GB 255
#define RAMLIMIT_BYTES (RAMLIMIT_GB * 1024ULL * 1024 * 1024)
/* Addresses and sizes of our components.
* 0..128MB is space for a flash device so we can run bootrom code such as UEFI.
* 128MB..256MB is used for miscellaneous device I/O.
@@ -127,10 +145,11 @@ static const MemMapEntry a15memmap[] = {
[VIRT_MMIO] = { 0x0a000000, 0x00000200 },
/* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
[VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
[VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 },
[VIRT_PCIE_MMIO] = { 0x10000000, 0x2eff0000 },
[VIRT_PCIE_PIO] = { 0x3eff0000, 0x00010000 },
[VIRT_PCIE_ECAM] = { 0x3f000000, 0x01000000 },
[VIRT_MEM] = { 0x40000000, 30ULL * 1024 * 1024 * 1024 },
[VIRT_MEM] = { 0x40000000, RAMLIMIT_BYTES },
/* Second PCIe window, 512GB wide at the 512GB boundary */
[VIRT_PCIE_MMIO_HIGH] = { 0x8000000000ULL, 0x8000000000ULL },
};
@@ -230,6 +249,10 @@ static void fdt_add_psci_node(const VirtBoardInfo *vbi)
void *fdt = vbi->fdt;
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0));
if (!vbi->using_psci) {
return;
}
qemu_fdt_add_subnode(fdt, "/psci");
if (armcpu->psci_version == 2) {
const char comp[] = "arm,psci-0.2\0arm,psci";
@@ -341,7 +364,7 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible",
armcpu->dtb_compatible);
if (vbi->smp_cpus > 1) {
if (vbi->using_psci && vbi->smp_cpus > 1) {
qemu_fdt_setprop_string(vbi->fdt, nodename,
"enable-method", "psci");
}
@@ -678,13 +701,15 @@ static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
}
static void create_one_flash(const char *name, hwaddr flashbase,
hwaddr flashsize)
hwaddr flashsize, const char *file,
MemoryRegion *sysmem)
{
/* Create and map a single flash device. We use the same
* parameters as the flash devices on the Versatile Express board.
*/
DriveInfo *dinfo = drive_get_next(IF_PFLASH);
DeviceState *dev = qdev_create(NULL, "cfi.pflash01");
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
const uint64_t sectorlength = 256 * 1024;
if (dinfo) {
@@ -704,19 +729,10 @@ static void create_one_flash(const char *name, hwaddr flashbase,
qdev_prop_set_string(dev, "name", name);
qdev_init_nofail(dev);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, flashbase);
}
memory_region_add_subregion(sysmem, flashbase,
sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0));
static void create_flash(const VirtBoardInfo *vbi)
{
/* Create two flash devices to fill the VIRT_FLASH space in the memmap.
* Any file passed via -bios goes in the first of these.
*/
hwaddr flashsize = vbi->memmap[VIRT_FLASH].size / 2;
hwaddr flashbase = vbi->memmap[VIRT_FLASH].base;
char *nodename;
if (bios_name) {
if (file) {
char *fn;
int image_size;
@@ -726,30 +742,73 @@ static void create_flash(const VirtBoardInfo *vbi)
"but you cannot use both options at once");
exit(1);
}
fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, file);
if (!fn) {
error_report("Could not find ROM image '%s'", bios_name);
error_report("Could not find ROM image '%s'", file);
exit(1);
}
image_size = load_image_targphys(fn, flashbase, flashsize);
image_size = load_image_mr(fn, sysbus_mmio_get_region(sbd, 0));
g_free(fn);
if (image_size < 0) {
error_report("Could not load ROM image '%s'", bios_name);
error_report("Could not load ROM image '%s'", file);
exit(1);
}
}
}
create_one_flash("virt.flash0", flashbase, flashsize);
create_one_flash("virt.flash1", flashbase + flashsize, flashsize);
static void create_flash(const VirtBoardInfo *vbi,
MemoryRegion *sysmem,
MemoryRegion *secure_sysmem)
{
/* Create two flash devices to fill the VIRT_FLASH space in the memmap.
* Any file passed via -bios goes in the first of these.
* sysmem is the system memory space. secure_sysmem is the secure view
* of the system, and the first flash device should be made visible only
* there. The second flash device is visible to both secure and nonsecure.
* If sysmem == secure_sysmem this means there is no separate Secure
* address space and both flash devices are generally visible.
*/
hwaddr flashsize = vbi->memmap[VIRT_FLASH].size / 2;
hwaddr flashbase = vbi->memmap[VIRT_FLASH].base;
char *nodename;
nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
qemu_fdt_add_subnode(vbi->fdt, nodename);
qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
2, flashbase, 2, flashsize,
2, flashbase + flashsize, 2, flashsize);
qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
g_free(nodename);
create_one_flash("virt.flash0", flashbase, flashsize,
bios_name, secure_sysmem);
create_one_flash("virt.flash1", flashbase + flashsize, flashsize,
NULL, sysmem);
if (sysmem == secure_sysmem) {
/* Report both flash devices as a single node in the DT */
nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
qemu_fdt_add_subnode(vbi->fdt, nodename);
qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
2, flashbase, 2, flashsize,
2, flashbase + flashsize, 2, flashsize);
qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
g_free(nodename);
} else {
/* Report the devices as separate nodes so we can mark one as
* only visible to the secure world.
*/
nodename = g_strdup_printf("/secflash@%" PRIx64, flashbase);
qemu_fdt_add_subnode(vbi->fdt, nodename);
qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
2, flashbase, 2, flashsize);
qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
qemu_fdt_setprop_string(vbi->fdt, nodename, "status", "disabled");
qemu_fdt_setprop_string(vbi->fdt, nodename, "secure-status", "okay");
g_free(nodename);
nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
qemu_fdt_add_subnode(vbi->fdt, nodename);
qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
2, flashbase + flashsize, 2, flashsize);
qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
g_free(nodename);
}
}
static void create_fw_cfg(const VirtBoardInfo *vbi, AddressSpace *as)
@@ -960,6 +1019,27 @@ static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
sysbus_mmio_get_region(s, 0));
}
static void create_secure_ram(VirtBoardInfo *vbi, MemoryRegion *secure_sysmem)
{
MemoryRegion *secram = g_new(MemoryRegion, 1);
char *nodename;
hwaddr base = vbi->memmap[VIRT_SECURE_MEM].base;
hwaddr size = vbi->memmap[VIRT_SECURE_MEM].size;
memory_region_init_ram(secram, NULL, "virt.secure-ram", size, &error_fatal);
vmstate_register_ram_global(secram);
memory_region_add_subregion(secure_sysmem, base, secram);
nodename = g_strdup_printf("/secram@%" PRIx64, base);
qemu_fdt_add_subnode(vbi->fdt, nodename);
qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "memory");
qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg", 2, base, 2, size);
qemu_fdt_setprop_string(vbi->fdt, nodename, "status", "disabled");
qemu_fdt_setprop_string(vbi->fdt, nodename, "secure-status", "okay");
g_free(nodename);
}
static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
{
const VirtBoardInfo *board = (const VirtBoardInfo *)binfo;
@@ -1020,6 +1100,7 @@ static void machvirt_init(MachineState *machine)
VirtGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
VirtGuestInfo *guest_info = &guest_info_state->info;
char **cpustr;
bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
if (!cpu_model) {
cpu_model = "cortex-a15";
@@ -1047,6 +1128,15 @@ static void machvirt_init(MachineState *machine)
exit(1);
}
/* If we have an EL3 boot ROM then the assumption is that it will
* implement PSCI itself, so disable QEMU's internal implementation
* so it doesn't get in the way. Instead of starting secondary
* CPUs in PSCI powerdown state we will start them all running and
* let the boot ROM sort them out.
* The usual case is that we do use QEMU's PSCI implementation.
*/
vbi->using_psci = !(vms->secure && firmware_loaded);
/* The maximum number of CPUs depends on the GIC version, or on how
* many redistributors we can fit into the memory map.
*/
@@ -1066,7 +1156,7 @@ static void machvirt_init(MachineState *machine)
vbi->smp_cpus = smp_cpus;
if (machine->ram_size > vbi->memmap[VIRT_MEM].size) {
error_report("mach-virt: cannot model more than 30GB RAM");
error_report("mach-virt: cannot model more than %dGB RAM", RAMLIMIT_GB);
exit(1);
}
@@ -1114,12 +1204,15 @@ static void machvirt_init(MachineState *machine)
object_property_set_bool(cpuobj, false, "has_el3", NULL);
}
object_property_set_int(cpuobj, QEMU_PSCI_CONDUIT_HVC, "psci-conduit",
NULL);
if (vbi->using_psci) {
object_property_set_int(cpuobj, QEMU_PSCI_CONDUIT_HVC,
"psci-conduit", NULL);
/* Secondary CPUs start in PSCI powered-down state */
if (n > 0) {
object_property_set_bool(cpuobj, true, "start-powered-off", NULL);
/* Secondary CPUs start in PSCI powered-down state */
if (n > 0) {
object_property_set_bool(cpuobj, true,
"start-powered-off", NULL);
}
}
if (object_property_find(cpuobj, "reset-cbar", NULL)) {
@@ -1145,13 +1238,14 @@ static void machvirt_init(MachineState *machine)
machine->ram_size);
memory_region_add_subregion(sysmem, vbi->memmap[VIRT_MEM].base, ram);
create_flash(vbi);
create_flash(vbi, sysmem, secure_sysmem ? secure_sysmem : sysmem);
create_gic(vbi, pic, gic_version, vms->secure);
create_uart(vbi, pic, VIRT_UART, sysmem);
if (vms->secure) {
create_secure_ram(vbi, secure_sysmem);
create_uart(vbi, pic, VIRT_SECURE_UART, secure_sysmem);
}
@@ -1187,7 +1281,7 @@ static void machvirt_init(MachineState *machine)
vbi->bootinfo.board_id = -1;
vbi->bootinfo.loader_start = vbi->memmap[VIRT_MEM].base;
vbi->bootinfo.get_dtb = machvirt_dtb;
vbi->bootinfo.firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
vbi->bootinfo.firmware_loaded = firmware_loaded;
arm_load_kernel(ARM_CPU(first_cpu), &vbi->bootinfo);
/*
@@ -1251,7 +1345,32 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp)
}
}
static void virt_instance_init(Object *obj)
static void virt_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
mc->init = machvirt_init;
/* Start max_cpus at the maximum QEMU supports. We'll further restrict
* it later in machvirt_init, where we have more information about the
* configuration of the particular instance.
*/
mc->max_cpus = MAX_CPUMASK_BITS;
mc->has_dynamic_sysbus = true;
mc->block_default_type = IF_VIRTIO;
mc->no_cdrom = 1;
mc->pci_allow_0_address = true;
}
static const TypeInfo virt_machine_info = {
.name = TYPE_VIRT_MACHINE,
.parent = TYPE_MACHINE,
.abstract = true,
.instance_size = sizeof(VirtMachineState),
.class_size = sizeof(VirtMachineClass),
.class_init = virt_machine_class_init,
};
static void virt_2_6_instance_init(Object *obj)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -1284,35 +1403,29 @@ static void virt_instance_init(Object *obj)
"Valid values are 2, 3 and host", NULL);
}
static void virt_class_init(ObjectClass *oc, void *data)
static void virt_2_6_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
static GlobalProperty compat_props[] = {
{ /* end of list */ }
};
mc->desc = "ARM Virtual Machine",
mc->init = machvirt_init;
/* Start max_cpus at the maximum QEMU supports. We'll further restrict
* it later in machvirt_init, where we have more information about the
* configuration of the particular instance.
*/
mc->max_cpus = MAX_CPUMASK_BITS;
mc->has_dynamic_sysbus = true;
mc->block_default_type = IF_VIRTIO;
mc->no_cdrom = 1;
mc->pci_allow_0_address = true;
mc->desc = "QEMU 2.6 ARM Virtual Machine";
mc->alias = "virt";
mc->compat_props = compat_props;
}
static const TypeInfo machvirt_info = {
.name = TYPE_VIRT_MACHINE,
.parent = TYPE_MACHINE,
.instance_size = sizeof(VirtMachineState),
.instance_init = virt_instance_init,
.class_size = sizeof(VirtMachineClass),
.class_init = virt_class_init,
.name = MACHINE_TYPE_NAME("virt-2.6"),
.parent = TYPE_VIRT_MACHINE,
.instance_init = virt_2_6_instance_init,
.class_init = virt_2_6_class_init,
};
static void machvirt_machine_init(void)
{
type_register_static(&virt_machine_info);
type_register_static(&machvirt_info);
}
machine_init(machvirt_machine_init);
type_init(machvirt_machine_init);

View File

@@ -2557,6 +2557,29 @@ FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i)
return isa->state.drives[i].drive;
}
void isa_fdc_get_drive_max_chs(FloppyDriveType type,
uint8_t *maxc, uint8_t *maxh, uint8_t *maxs)
{
const FDFormat *fdf;
*maxc = *maxh = *maxs = 0;
for (fdf = fd_formats; fdf->drive != FLOPPY_DRIVE_TYPE_NONE; fdf++) {
if (fdf->drive != type) {
continue;
}
if (*maxc < fdf->max_track) {
*maxc = fdf->max_track;
}
if (*maxh < fdf->max_head) {
*maxh = fdf->max_head;
}
if (*maxs < fdf->last_sect) {
*maxs = fdf->last_sect;
}
}
(*maxc)--;
}
static const VMStateDescription vmstate_isa_fdc ={
.name = "fdc",
.version_id = 2,

View File

@@ -917,7 +917,7 @@ static int blk_connect(struct XenDevice *xendev)
/* setup via xenbus -> create new block driver instance */
xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
blkdev->blk = blk_new_open(blkdev->dev, blkdev->filename, NULL, options,
blkdev->blk = blk_new_open(blkdev->filename, NULL, options,
qflags, &local_err);
if (!blkdev->blk) {
xen_be_printf(&blkdev->xendev, 0, "error: %s\n",

View File

@@ -16,6 +16,7 @@ obj-$(CONFIG_SH4) += sh_serial.o
obj-$(CONFIG_PSERIES) += spapr_vty.o
obj-$(CONFIG_DIGIC) += digic-uart.o
obj-$(CONFIG_STM32F2XX_USART) += stm32f2xx_usart.o
obj-$(CONFIG_RASPI) += bcm2835_aux.o
common-obj-$(CONFIG_ETRAXFS) += etraxfs_ser.o
common-obj-$(CONFIG_ISA_DEBUG) += debugcon.o

316
hw/char/bcm2835_aux.c Normal file
View File

@@ -0,0 +1,316 @@
/*
* BCM2835 (Raspberry Pi / Pi 2) Aux block (mini UART and SPI).
* Copyright (c) 2015, Microsoft
* Written by Andrew Baumann
* Based on pl011.c, copyright terms below:
*
* Arm PrimeCell PL011 UART
*
* Copyright (c) 2006 CodeSourcery.
* Written by Paul Brook
*
* This code is licensed under the GPL.
*
* At present only the core UART functions (data path for tx/rx) are
* implemented. The following features/registers are unimplemented:
* - Line/modem control
* - Scratch register
* - Extra control
* - Baudrate
* - SPI interfaces
*/
#include "qemu/osdep.h"
#include "hw/char/bcm2835_aux.h"
#define AUX_IRQ 0x0
#define AUX_ENABLES 0x4
#define AUX_MU_IO_REG 0x40
#define AUX_MU_IER_REG 0x44
#define AUX_MU_IIR_REG 0x48
#define AUX_MU_LCR_REG 0x4c
#define AUX_MU_MCR_REG 0x50
#define AUX_MU_LSR_REG 0x54
#define AUX_MU_MSR_REG 0x58
#define AUX_MU_SCRATCH 0x5c
#define AUX_MU_CNTL_REG 0x60
#define AUX_MU_STAT_REG 0x64
#define AUX_MU_BAUD_REG 0x68
/* bits in IER/IIR registers */
#define TX_INT 0x1
#define RX_INT 0x2
static void bcm2835_aux_update(BCM2835AuxState *s)
{
/* signal an interrupt if either:
* 1. rx interrupt is enabled and we have a non-empty rx fifo, or
* 2. the tx interrupt is enabled (since we instantly drain the tx fifo)
*/
s->iir = 0;
if ((s->ier & RX_INT) && s->read_count != 0) {
s->iir |= RX_INT;
}
if (s->ier & TX_INT) {
s->iir |= TX_INT;
}
qemu_set_irq(s->irq, s->iir != 0);
}
static uint64_t bcm2835_aux_read(void *opaque, hwaddr offset, unsigned size)
{
BCM2835AuxState *s = opaque;
uint32_t c, res;
switch (offset) {
case AUX_IRQ:
return s->iir != 0;
case AUX_ENABLES:
return 1; /* mini UART permanently enabled */
case AUX_MU_IO_REG:
/* "DLAB bit set means access baudrate register" is NYI */
c = s->read_fifo[s->read_pos];
if (s->read_count > 0) {
s->read_count--;
if (++s->read_pos == BCM2835_AUX_RX_FIFO_LEN) {
s->read_pos = 0;
}
}
if (s->chr) {
qemu_chr_accept_input(s->chr);
}
bcm2835_aux_update(s);
return c;
case AUX_MU_IER_REG:
/* "DLAB bit set means access baudrate register" is NYI */
return 0xc0 | s->ier; /* FIFO enables always read 1 */
case AUX_MU_IIR_REG:
res = 0xc0; /* FIFO enables */
/* The spec is unclear on what happens when both tx and rx
* interrupts are active, besides that this cannot occur. At
* present, we choose to prioritise the rx interrupt, since
* the tx fifo is always empty. */
if (s->read_count != 0) {
res |= 0x4;
} else {
res |= 0x2;
}
if (s->iir == 0) {
res |= 0x1;
}
return res;
case AUX_MU_LCR_REG:
qemu_log_mask(LOG_UNIMP, "%s: AUX_MU_LCR_REG unsupported\n", __func__);
return 0;
case AUX_MU_MCR_REG:
qemu_log_mask(LOG_UNIMP, "%s: AUX_MU_MCR_REG unsupported\n", __func__);
return 0;
case AUX_MU_LSR_REG:
res = 0x60; /* tx idle, empty */
if (s->read_count != 0) {
res |= 0x1;
}
return res;
case AUX_MU_MSR_REG:
qemu_log_mask(LOG_UNIMP, "%s: AUX_MU_MSR_REG unsupported\n", __func__);
return 0;
case AUX_MU_SCRATCH:
qemu_log_mask(LOG_UNIMP, "%s: AUX_MU_SCRATCH unsupported\n", __func__);
return 0;
case AUX_MU_CNTL_REG:
return 0x3; /* tx, rx enabled */
case AUX_MU_STAT_REG:
res = 0x30e; /* space in the output buffer, empty tx fifo, idle tx/rx */
if (s->read_count > 0) {
res |= 0x1; /* data in input buffer */
assert(s->read_count < BCM2835_AUX_RX_FIFO_LEN);
res |= ((uint32_t)s->read_count) << 16; /* rx fifo fill level */
}
return res;
case AUX_MU_BAUD_REG:
qemu_log_mask(LOG_UNIMP, "%s: AUX_MU_BAUD_REG unsupported\n", __func__);
return 0;
default:
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
__func__, offset);
return 0;
}
}
static void bcm2835_aux_write(void *opaque, hwaddr offset, uint64_t value,
unsigned size)
{
BCM2835AuxState *s = opaque;
unsigned char ch;
switch (offset) {
case AUX_ENABLES:
if (value != 1) {
qemu_log_mask(LOG_UNIMP, "%s: unsupported attempt to enable SPI "
"or disable UART\n", __func__);
}
break;
case AUX_MU_IO_REG:
/* "DLAB bit set means access baudrate register" is NYI */
ch = value;
if (s->chr) {
qemu_chr_fe_write(s->chr, &ch, 1);
}
break;
case AUX_MU_IER_REG:
/* "DLAB bit set means access baudrate register" is NYI */
s->ier = value & (TX_INT | RX_INT);
bcm2835_aux_update(s);
break;
case AUX_MU_IIR_REG:
if (value & 0x2) {
s->read_count = 0;
}
break;
case AUX_MU_LCR_REG:
qemu_log_mask(LOG_UNIMP, "%s: AUX_MU_LCR_REG unsupported\n", __func__);
break;
case AUX_MU_MCR_REG:
qemu_log_mask(LOG_UNIMP, "%s: AUX_MU_MCR_REG unsupported\n", __func__);
break;
case AUX_MU_SCRATCH:
qemu_log_mask(LOG_UNIMP, "%s: AUX_MU_SCRATCH unsupported\n", __func__);
break;
case AUX_MU_CNTL_REG:
qemu_log_mask(LOG_UNIMP, "%s: AUX_MU_CNTL_REG unsupported\n", __func__);
break;
case AUX_MU_BAUD_REG:
qemu_log_mask(LOG_UNIMP, "%s: AUX_MU_BAUD_REG unsupported\n", __func__);
break;
default:
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
__func__, offset);
}
bcm2835_aux_update(s);
}
static int bcm2835_aux_can_receive(void *opaque)
{
BCM2835AuxState *s = opaque;
return s->read_count < BCM2835_AUX_RX_FIFO_LEN;
}
static void bcm2835_aux_put_fifo(void *opaque, uint8_t value)
{
BCM2835AuxState *s = opaque;
int slot;
slot = s->read_pos + s->read_count;
if (slot >= BCM2835_AUX_RX_FIFO_LEN) {
slot -= BCM2835_AUX_RX_FIFO_LEN;
}
s->read_fifo[slot] = value;
s->read_count++;
if (s->read_count == BCM2835_AUX_RX_FIFO_LEN) {
/* buffer full */
}
bcm2835_aux_update(s);
}
static void bcm2835_aux_receive(void *opaque, const uint8_t *buf, int size)
{
bcm2835_aux_put_fifo(opaque, *buf);
}
static const MemoryRegionOps bcm2835_aux_ops = {
.read = bcm2835_aux_read,
.write = bcm2835_aux_write,
.endianness = DEVICE_NATIVE_ENDIAN,
.valid.min_access_size = 4,
.valid.max_access_size = 4,
};
static const VMStateDescription vmstate_bcm2835_aux = {
.name = TYPE_BCM2835_AUX,
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT8_ARRAY(read_fifo, BCM2835AuxState,
BCM2835_AUX_RX_FIFO_LEN),
VMSTATE_UINT8(read_pos, BCM2835AuxState),
VMSTATE_UINT8(read_count, BCM2835AuxState),
VMSTATE_UINT8(ier, BCM2835AuxState),
VMSTATE_UINT8(iir, BCM2835AuxState),
VMSTATE_END_OF_LIST()
}
};
static void bcm2835_aux_init(Object *obj)
{
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
BCM2835AuxState *s = BCM2835_AUX(obj);
memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_aux_ops, s,
TYPE_BCM2835_AUX, 0x100);
sysbus_init_mmio(sbd, &s->iomem);
sysbus_init_irq(sbd, &s->irq);
}
static void bcm2835_aux_realize(DeviceState *dev, Error **errp)
{
BCM2835AuxState *s = BCM2835_AUX(dev);
if (s->chr) {
qemu_chr_add_handlers(s->chr, bcm2835_aux_can_receive,
bcm2835_aux_receive, NULL, s);
}
}
static Property bcm2835_aux_props[] = {
DEFINE_PROP_CHR("chardev", BCM2835AuxState, chr),
DEFINE_PROP_END_OF_LIST(),
};
static void bcm2835_aux_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
dc->realize = bcm2835_aux_realize;
dc->vmsd = &vmstate_bcm2835_aux;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
dc->props = bcm2835_aux_props;
}
static const TypeInfo bcm2835_aux_info = {
.name = TYPE_BCM2835_AUX,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(BCM2835AuxState),
.instance_init = bcm2835_aux_init,
.class_init = bcm2835_aux_class_init,
};
static void bcm2835_aux_register_types(void)
{
type_register_static(&bcm2835_aux_info);
}
type_init(bcm2835_aux_register_types)

View File

@@ -842,14 +842,16 @@ static void sunkbd_handle_event(DeviceState *dev, QemuConsole *src,
{
ChannelState *s = (ChannelState *)dev;
int qcode, keycode;
InputKeyEvent *key;
assert(evt->type == INPUT_EVENT_KIND_KEY);
qcode = qemu_input_key_value_to_qcode(evt->u.key->key);
key = evt->u.key;
qcode = qemu_input_key_value_to_qcode(key->key);
trace_escc_sunkbd_event_in(qcode, QKeyCode_lookup[qcode],
evt->u.key->down);
key->down);
if (qcode == Q_KEY_CODE_CAPS_LOCK) {
if (evt->u.key->down) {
if (key->down) {
s->caps_lock_mode ^= 1;
if (s->caps_lock_mode == 2) {
return; /* Drop second press */
@@ -863,7 +865,7 @@ static void sunkbd_handle_event(DeviceState *dev, QemuConsole *src,
}
if (qcode == Q_KEY_CODE_NUM_LOCK) {
if (evt->u.key->down) {
if (key->down) {
s->num_lock_mode ^= 1;
if (s->num_lock_mode == 2) {
return; /* Drop second press */
@@ -877,7 +879,7 @@ static void sunkbd_handle_event(DeviceState *dev, QemuConsole *src,
}
keycode = qcode_to_keycode[qcode];
if (!evt->u.key->down) {
if (!key->down) {
keycode |= 0x80;
}
trace_escc_sunkbd_event_out(keycode);

View File

@@ -147,6 +147,28 @@ int load_image_targphys(const char *filename,
return size;
}
int load_image_mr(const char *filename, MemoryRegion *mr)
{
int size;
if (!memory_access_is_direct(mr, false)) {
/* Can only load an image into RAM or ROM */
return -1;
}
size = get_image_size(filename);
if (size > memory_region_size(mr)) {
return -1;
}
if (size > 0) {
if (rom_add_file_mr(filename, mr, -1) < 0) {
return -1;
}
}
return size;
}
void pstrcpy_targphys(const char *name, hwaddr dest, int buf_size,
const char *source)
{
@@ -332,10 +354,66 @@ const char *load_elf_strerror(int error)
}
}
void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp)
{
int fd;
uint8_t e_ident_local[EI_NIDENT];
uint8_t *e_ident;
size_t hdr_size, off;
bool is64l;
if (!hdr) {
hdr = e_ident_local;
}
e_ident = hdr;
fd = open(filename, O_RDONLY | O_BINARY);
if (fd < 0) {
error_setg_errno(errp, errno, "Failed to open file: %s", filename);
return;
}
if (read(fd, hdr, EI_NIDENT) != EI_NIDENT) {
error_setg_errno(errp, errno, "Failed to read file: %s", filename);
goto fail;
}
if (e_ident[0] != ELFMAG0 ||
e_ident[1] != ELFMAG1 ||
e_ident[2] != ELFMAG2 ||
e_ident[3] != ELFMAG3) {
error_setg(errp, "Bad ELF magic");
goto fail;
}
is64l = e_ident[EI_CLASS] == ELFCLASS64;
hdr_size = is64l ? sizeof(Elf64_Ehdr) : sizeof(Elf32_Ehdr);
if (is64) {
*is64 = is64l;
}
off = EI_NIDENT;
while (hdr != e_ident_local && off < hdr_size) {
size_t br = read(fd, hdr + off, hdr_size - off);
switch (br) {
case 0:
error_setg(errp, "File too short: %s", filename);
goto fail;
case -1:
error_setg_errno(errp, errno, "Failed to read file: %s",
filename);
goto fail;
}
off += br;
}
fail:
close(fd);
}
/* return < 0 if error, otherwise the number of bytes loaded in memory */
int load_elf(const char *filename, uint64_t (*translate_fn)(void *, uint64_t),
void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
uint64_t *highaddr, int big_endian, int elf_machine, int clear_lsb)
uint64_t *highaddr, int big_endian, int elf_machine,
int clear_lsb, int data_swab)
{
int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED;
uint8_t e_ident[EI_NIDENT];
@@ -374,10 +452,12 @@ int load_elf(const char *filename, uint64_t (*translate_fn)(void *, uint64_t),
lseek(fd, 0, SEEK_SET);
if (e_ident[EI_CLASS] == ELFCLASS64) {
ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab,
pentry, lowaddr, highaddr, elf_machine, clear_lsb);
pentry, lowaddr, highaddr, elf_machine, clear_lsb,
data_swab);
} else {
ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab,
pentry, lowaddr, highaddr, elf_machine, clear_lsb);
pentry, lowaddr, highaddr, elf_machine, clear_lsb,
data_swab);
}
fail:
@@ -751,7 +831,7 @@ static void *rom_set_mr(Rom *rom, Object *owner, const char *name)
int rom_add_file(const char *file, const char *fw_dir,
hwaddr addr, int32_t bootindex,
bool option_rom)
bool option_rom, MemoryRegion *mr)
{
MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
Rom *rom;
@@ -818,7 +898,12 @@ int rom_add_file(const char *file, const char *fw_dir,
fw_cfg_add_file(fw_cfg, fw_file_name, data, rom->romsize);
} else {
snprintf(devpath, sizeof(devpath), "/rom@" TARGET_FMT_plx, addr);
if (mr) {
rom->mr = mr;
snprintf(devpath, sizeof(devpath), "/rom@%s", file);
} else {
snprintf(devpath, sizeof(devpath), "/rom@" TARGET_FMT_plx, addr);
}
}
add_boot_device_path(bootindex, NULL, devpath);
@@ -892,12 +977,12 @@ int rom_add_elf_program(const char *name, void *data, size_t datasize,
int rom_add_vga(const char *file)
{
return rom_add_file(file, "vgaroms", 0, -1, true);
return rom_add_file(file, "vgaroms", 0, -1, true, NULL);
}
int rom_add_option(const char *file, int32_t bootindex)
{
return rom_add_file(file, "genroms", 0, bootindex, true);
return rom_add_file(file, "genroms", 0, bootindex, true, NULL);
}
static void rom_reset(void *unused)

View File

@@ -109,7 +109,7 @@ static void a15mp_priv_realize(DeviceState *dev, Error **errp)
/* Memory map (addresses are offsets from PERIPHBASE):
* 0x0000-0x0fff -- reserved
* 0x1000-0x1fff -- GIC Distributor
* 0x2000-0x2fff -- GIC CPU interface
* 0x2000-0x3fff -- GIC CPU interface
* 0x4000-0x4fff -- GIC virtual interface control (not modelled)
* 0x5000-0x5fff -- GIC virtual interface control (not modelled)
* 0x6000-0x7fff -- GIC virtual CPU interface (not modelled)

View File

@@ -73,7 +73,7 @@ void cris_load_image(CRISCPU *cpu, struct cris_load_info *li)
/* Boots a kernel elf binary, os/linux-2.6/vmlinux from the axis
devboard SDK. */
image_size = load_elf(li->image_filename, translate_kernel_address, NULL,
&entry, NULL, &high, 0, EM_CRIS, 0);
&entry, NULL, &high, 0, EM_CRIS, 0, 0);
li->entry = entry;
if (image_size < 0) {
/* Takes a kimage from the axis devboard SDK. */

View File

@@ -27,6 +27,7 @@ endif
obj-$(CONFIG_OMAP) += omap_dss.o
obj-$(CONFIG_OMAP) += omap_lcdc.o
obj-$(CONFIG_PXA2XX) += pxa2xx_lcd.o
obj-$(CONFIG_RASPI) += bcm2835_fb.o
obj-$(CONFIG_SM501) += sm501.o
obj-$(CONFIG_TCX) += tcx.o
obj-$(CONFIG_CG3) += cg3.o

424
hw/display/bcm2835_fb.c Normal file
View File

@@ -0,0 +1,424 @@
/*
* Raspberry Pi emulation (c) 2012 Gregory Estrade
* Refactoring for Pi2 Copyright (c) 2015, Microsoft. Written by Andrew Baumann.
* This code is licensed under the GNU GPLv2 and later.
*
* Heavily based on milkymist-vgafb.c, copyright terms below:
* QEMU model of the Milkymist VGA framebuffer.
*
* Copyright (c) 2010-2012 Michael Walle <michael@walle.cc>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
*/
#include "qemu/osdep.h"
#include "hw/display/bcm2835_fb.h"
#include "hw/display/framebuffer.h"
#include "ui/pixel_ops.h"
#include "hw/misc/bcm2835_mbox_defs.h"
#define DEFAULT_VCRAM_SIZE 0x4000000
#define BCM2835_FB_OFFSET 0x00100000
static void fb_invalidate_display(void *opaque)
{
BCM2835FBState *s = BCM2835_FB(opaque);
s->invalidate = true;
}
static void draw_line_src16(void *opaque, uint8_t *dst, const uint8_t *src,
int width, int deststep)
{
BCM2835FBState *s = opaque;
uint16_t rgb565;
uint32_t rgb888;
uint8_t r, g, b;
DisplaySurface *surface = qemu_console_surface(s->con);
int bpp = surface_bits_per_pixel(surface);
while (width--) {
switch (s->bpp) {
case 8:
/* lookup palette starting at video ram base
* TODO: cache translation, rather than doing this each time!
*/
rgb888 = ldl_le_phys(&s->dma_as, s->vcram_base + (*src << 2));
r = (rgb888 >> 0) & 0xff;
g = (rgb888 >> 8) & 0xff;
b = (rgb888 >> 16) & 0xff;
src++;
break;
case 16:
rgb565 = lduw_le_p(src);
r = ((rgb565 >> 11) & 0x1f) << 3;
g = ((rgb565 >> 5) & 0x3f) << 2;
b = ((rgb565 >> 0) & 0x1f) << 3;
src += 2;
break;
case 24:
rgb888 = ldl_le_p(src);
r = (rgb888 >> 0) & 0xff;
g = (rgb888 >> 8) & 0xff;
b = (rgb888 >> 16) & 0xff;
src += 3;
break;
case 32:
rgb888 = ldl_le_p(src);
r = (rgb888 >> 0) & 0xff;
g = (rgb888 >> 8) & 0xff;
b = (rgb888 >> 16) & 0xff;
src += 4;
break;
default:
r = 0;
g = 0;
b = 0;
break;
}
if (s->pixo == 0) {
/* swap to BGR pixel format */
uint8_t tmp = r;
r = b;
b = tmp;
}
switch (bpp) {
case 8:
*dst++ = rgb_to_pixel8(r, g, b);
break;
case 15:
*(uint16_t *)dst = rgb_to_pixel15(r, g, b);
dst += 2;
break;
case 16:
*(uint16_t *)dst = rgb_to_pixel16(r, g, b);
dst += 2;
break;
case 24:
rgb888 = rgb_to_pixel24(r, g, b);
*dst++ = rgb888 & 0xff;
*dst++ = (rgb888 >> 8) & 0xff;
*dst++ = (rgb888 >> 16) & 0xff;
break;
case 32:
*(uint32_t *)dst = rgb_to_pixel32(r, g, b);
dst += 4;
break;
default:
return;
}
}
}
static void fb_update_display(void *opaque)
{
BCM2835FBState *s = opaque;
DisplaySurface *surface = qemu_console_surface(s->con);
int first = 0;
int last = 0;
int src_width = 0;
int dest_width = 0;
if (s->lock || !s->xres) {
return;
}
src_width = s->xres * (s->bpp >> 3);
dest_width = s->xres;
switch (surface_bits_per_pixel(surface)) {
case 0:
return;
case 8:
break;
case 15:
dest_width *= 2;
break;
case 16:
dest_width *= 2;
break;
case 24:
dest_width *= 3;
break;
case 32:
dest_width *= 4;
break;
default:
hw_error("bcm2835_fb: bad color depth\n");
break;
}
if (s->invalidate) {
framebuffer_update_memory_section(&s->fbsection, s->dma_mr, s->base,
s->yres, src_width);
}
framebuffer_update_display(surface, &s->fbsection, s->xres, s->yres,
src_width, dest_width, 0, s->invalidate,
draw_line_src16, s, &first, &last);
if (first >= 0) {
dpy_gfx_update(s->con, 0, first, s->xres, last - first + 1);
}
s->invalidate = false;
}
static void bcm2835_fb_mbox_push(BCM2835FBState *s, uint32_t value)
{
value &= ~0xf;
s->lock = true;
s->xres = ldl_le_phys(&s->dma_as, value);
s->yres = ldl_le_phys(&s->dma_as, value + 4);
s->xres_virtual = ldl_le_phys(&s->dma_as, value + 8);
s->yres_virtual = ldl_le_phys(&s->dma_as, value + 12);
s->bpp = ldl_le_phys(&s->dma_as, value + 20);
s->xoffset = ldl_le_phys(&s->dma_as, value + 24);
s->yoffset = ldl_le_phys(&s->dma_as, value + 28);
s->base = s->vcram_base | (value & 0xc0000000);
s->base += BCM2835_FB_OFFSET;
/* TODO - Manage properly virtual resolution */
s->pitch = s->xres * (s->bpp >> 3);
s->size = s->yres * s->pitch;
stl_le_phys(&s->dma_as, value + 16, s->pitch);
stl_le_phys(&s->dma_as, value + 32, s->base);
stl_le_phys(&s->dma_as, value + 36, s->size);
s->invalidate = true;
qemu_console_resize(s->con, s->xres, s->yres);
s->lock = false;
}
void bcm2835_fb_reconfigure(BCM2835FBState *s, uint32_t *xres, uint32_t *yres,
uint32_t *xoffset, uint32_t *yoffset, uint32_t *bpp,
uint32_t *pixo, uint32_t *alpha)
{
s->lock = true;
/* TODO: input validation! */
if (xres) {
s->xres = *xres;
}
if (yres) {
s->yres = *yres;
}
if (xoffset) {
s->xoffset = *xoffset;
}
if (yoffset) {
s->yoffset = *yoffset;
}
if (bpp) {
s->bpp = *bpp;
}
if (pixo) {
s->pixo = *pixo;
}
if (alpha) {
s->alpha = *alpha;
}
/* TODO - Manage properly virtual resolution */
s->pitch = s->xres * (s->bpp >> 3);
s->size = s->yres * s->pitch;
s->invalidate = true;
qemu_console_resize(s->con, s->xres, s->yres);
s->lock = false;
}
static uint64_t bcm2835_fb_read(void *opaque, hwaddr offset, unsigned size)
{
BCM2835FBState *s = opaque;
uint32_t res = 0;
switch (offset) {
case MBOX_AS_DATA:
res = MBOX_CHAN_FB;
s->pending = false;
qemu_set_irq(s->mbox_irq, 0);
break;
case MBOX_AS_PENDING:
res = s->pending;
break;
default:
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
__func__, offset);
return 0;
}
return res;
}
static void bcm2835_fb_write(void *opaque, hwaddr offset, uint64_t value,
unsigned size)
{
BCM2835FBState *s = opaque;
switch (offset) {
case MBOX_AS_DATA:
/* bcm2835_mbox should check our pending status before pushing */
assert(!s->pending);
s->pending = true;
bcm2835_fb_mbox_push(s, value);
qemu_set_irq(s->mbox_irq, 1);
break;
default:
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
__func__, offset);
return;
}
}
static const MemoryRegionOps bcm2835_fb_ops = {
.read = bcm2835_fb_read,
.write = bcm2835_fb_write,
.endianness = DEVICE_NATIVE_ENDIAN,
.valid.min_access_size = 4,
.valid.max_access_size = 4,
};
static const VMStateDescription vmstate_bcm2835_fb = {
.name = TYPE_BCM2835_FB,
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_BOOL(lock, BCM2835FBState),
VMSTATE_BOOL(invalidate, BCM2835FBState),
VMSTATE_BOOL(pending, BCM2835FBState),
VMSTATE_UINT32(xres, BCM2835FBState),
VMSTATE_UINT32(yres, BCM2835FBState),
VMSTATE_UINT32(xres_virtual, BCM2835FBState),
VMSTATE_UINT32(yres_virtual, BCM2835FBState),
VMSTATE_UINT32(xoffset, BCM2835FBState),
VMSTATE_UINT32(yoffset, BCM2835FBState),
VMSTATE_UINT32(bpp, BCM2835FBState),
VMSTATE_UINT32(base, BCM2835FBState),
VMSTATE_UINT32(pitch, BCM2835FBState),
VMSTATE_UINT32(size, BCM2835FBState),
VMSTATE_UINT32(pixo, BCM2835FBState),
VMSTATE_UINT32(alpha, BCM2835FBState),
VMSTATE_END_OF_LIST()
}
};
static const GraphicHwOps vgafb_ops = {
.invalidate = fb_invalidate_display,
.gfx_update = fb_update_display,
};
static void bcm2835_fb_init(Object *obj)
{
BCM2835FBState *s = BCM2835_FB(obj);
memory_region_init_io(&s->iomem, obj, &bcm2835_fb_ops, s, TYPE_BCM2835_FB,
0x10);
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq);
}
static void bcm2835_fb_reset(DeviceState *dev)
{
BCM2835FBState *s = BCM2835_FB(dev);
s->pending = false;
s->xres_virtual = s->xres;
s->yres_virtual = s->yres;
s->xoffset = 0;
s->yoffset = 0;
s->base = s->vcram_base + BCM2835_FB_OFFSET;
s->pitch = s->xres * (s->bpp >> 3);
s->size = s->yres * s->pitch;
s->invalidate = true;
s->lock = false;
}
static void bcm2835_fb_realize(DeviceState *dev, Error **errp)
{
BCM2835FBState *s = BCM2835_FB(dev);
Error *err = NULL;
Object *obj;
if (s->vcram_base == 0) {
error_setg(errp, "%s: required vcram-base property not set", __func__);
return;
}
obj = object_property_get_link(OBJECT(dev), "dma-mr", &err);
if (obj == NULL) {
error_setg(errp, "%s: required dma-mr link not found: %s",
__func__, error_get_pretty(err));
return;
}
s->dma_mr = MEMORY_REGION(obj);
address_space_init(&s->dma_as, s->dma_mr, NULL);
bcm2835_fb_reset(dev);
s->con = graphic_console_init(dev, 0, &vgafb_ops, s);
qemu_console_resize(s->con, s->xres, s->yres);
}
static Property bcm2835_fb_props[] = {
DEFINE_PROP_UINT32("vcram-base", BCM2835FBState, vcram_base, 0),/*required*/
DEFINE_PROP_UINT32("vcram-size", BCM2835FBState, vcram_size,
DEFAULT_VCRAM_SIZE),
DEFINE_PROP_UINT32("xres", BCM2835FBState, xres, 640),
DEFINE_PROP_UINT32("yres", BCM2835FBState, yres, 480),
DEFINE_PROP_UINT32("bpp", BCM2835FBState, bpp, 16),
DEFINE_PROP_UINT32("pixo", BCM2835FBState, pixo, 1), /* 1=RGB, 0=BGR */
DEFINE_PROP_UINT32("alpha", BCM2835FBState, alpha, 2), /* alpha ignored */
DEFINE_PROP_END_OF_LIST()
};
static void bcm2835_fb_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->props = bcm2835_fb_props;
dc->realize = bcm2835_fb_realize;
dc->reset = bcm2835_fb_reset;
dc->vmsd = &vmstate_bcm2835_fb;
}
static TypeInfo bcm2835_fb_info = {
.name = TYPE_BCM2835_FB,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(BCM2835FBState),
.class_init = bcm2835_fb_class_init,
.instance_init = bcm2835_fb_init,
};
static void bcm2835_fb_register_types(void)
{
type_register_static(&bcm2835_fb_info);
}
type_init(bcm2835_fb_register_types)

View File

@@ -276,14 +276,14 @@ static bool blit_region_is_unsafe(struct CirrusVGAState *s,
+ ((int64_t)s->cirrus_blt_height-1) * pitch;
int32_t max = addr
+ s->cirrus_blt_width;
if (min < 0 || max >= s->vga.vram_size) {
if (min < 0 || max > s->vga.vram_size) {
return true;
}
} else {
int64_t max = addr
+ ((int64_t)s->cirrus_blt_height-1) * pitch
+ s->cirrus_blt_width;
if (max >= s->vga.vram_size) {
if (max > s->vga.vram_size) {
return true;
}
}

View File

@@ -1156,7 +1156,9 @@ static void qxl_soft_reset(PCIQXLDevice *d)
trace_qxl_soft_reset(d->id);
qxl_check_state(d);
qxl_clear_guest_bug(d);
qemu_mutex_lock(&d->async_lock);
d->current_async = QXL_UNDEFINED_IO;
qemu_mutex_unlock(&d->async_lock);
if (d->id == 0) {
qxl_enter_vga_mode(d);

View File

@@ -11,3 +11,4 @@ common-obj-$(CONFIG_SUN4M) += sun4m_iommu.o
obj-$(CONFIG_OMAP) += omap_dma.o soc_dma.o
obj-$(CONFIG_PXA2XX) += pxa2xx_dma.o
obj-$(CONFIG_RASPI) += bcm2835_dma.o

408
hw/dma/bcm2835_dma.c Normal file
View File

@@ -0,0 +1,408 @@
/*
* Raspberry Pi emulation (c) 2012 Gregory Estrade
* This code is licensed under the GNU GPLv2 and later.
*/
#include "qemu/osdep.h"
#include "hw/dma/bcm2835_dma.h"
/* DMA CS Control and Status bits */
#define BCM2708_DMA_ACTIVE (1 << 0)
#define BCM2708_DMA_END (1 << 1) /* GE */
#define BCM2708_DMA_INT (1 << 2)
#define BCM2708_DMA_ISPAUSED (1 << 4) /* Pause requested or not active */
#define BCM2708_DMA_ISHELD (1 << 5) /* Is held by DREQ flow control */
#define BCM2708_DMA_ERR (1 << 8)
#define BCM2708_DMA_ABORT (1 << 30) /* stop current CB, go to next, WO */
#define BCM2708_DMA_RESET (1 << 31) /* WO, self clearing */
/* DMA control block "info" field bits */
#define BCM2708_DMA_INT_EN (1 << 0)
#define BCM2708_DMA_TDMODE (1 << 1)
#define BCM2708_DMA_WAIT_RESP (1 << 3)
#define BCM2708_DMA_D_INC (1 << 4)
#define BCM2708_DMA_D_WIDTH (1 << 5)
#define BCM2708_DMA_D_DREQ (1 << 6)
#define BCM2708_DMA_D_IGNORE (1 << 7)
#define BCM2708_DMA_S_INC (1 << 8)
#define BCM2708_DMA_S_WIDTH (1 << 9)
#define BCM2708_DMA_S_DREQ (1 << 10)
#define BCM2708_DMA_S_IGNORE (1 << 11)
/* Register offsets */
#define BCM2708_DMA_CS 0x00 /* Control and Status */
#define BCM2708_DMA_ADDR 0x04 /* Control block address */
/* the current control block appears in the following registers - read only */
#define BCM2708_DMA_INFO 0x08
#define BCM2708_DMA_SOURCE_AD 0x0c
#define BCM2708_DMA_DEST_AD 0x10
#define BCM2708_DMA_TXFR_LEN 0x14
#define BCM2708_DMA_STRIDE 0x18
#define BCM2708_DMA_NEXTCB 0x1C
#define BCM2708_DMA_DEBUG 0x20
#define BCM2708_DMA_INT_STATUS 0xfe0 /* Interrupt status of each channel */
#define BCM2708_DMA_ENABLE 0xff0 /* Global enable bits for each channel */
#define BCM2708_DMA_CS_RW_MASK 0x30ff0001 /* All RW bits in DMA_CS */
static void bcm2835_dma_update(BCM2835DMAState *s, unsigned c)
{
BCM2835DMAChan *ch = &s->chan[c];
uint32_t data, xlen, ylen;
int16_t dst_stride, src_stride;
if (!(s->enable & (1 << c))) {
return;
}
while ((s->enable & (1 << c)) && (ch->conblk_ad != 0)) {
/* CB fetch */
ch->ti = ldl_le_phys(&s->dma_as, ch->conblk_ad);
ch->source_ad = ldl_le_phys(&s->dma_as, ch->conblk_ad + 4);
ch->dest_ad = ldl_le_phys(&s->dma_as, ch->conblk_ad + 8);
ch->txfr_len = ldl_le_phys(&s->dma_as, ch->conblk_ad + 12);
ch->stride = ldl_le_phys(&s->dma_as, ch->conblk_ad + 16);
ch->nextconbk = ldl_le_phys(&s->dma_as, ch->conblk_ad + 20);
if (ch->ti & BCM2708_DMA_TDMODE) {
/* 2D transfer mode */
ylen = (ch->txfr_len >> 16) & 0x3fff;
xlen = ch->txfr_len & 0xffff;
dst_stride = ch->stride >> 16;
src_stride = ch->stride & 0xffff;
} else {
ylen = 1;
xlen = ch->txfr_len;
dst_stride = 0;
src_stride = 0;
}
while (ylen != 0) {
/* Normal transfer mode */
while (xlen != 0) {
if (ch->ti & BCM2708_DMA_S_IGNORE) {
/* Ignore reads */
data = 0;
} else {
data = ldl_le_phys(&s->dma_as, ch->source_ad);
}
if (ch->ti & BCM2708_DMA_S_INC) {
ch->source_ad += 4;
}
if (ch->ti & BCM2708_DMA_D_IGNORE) {
/* Ignore writes */
} else {
stl_le_phys(&s->dma_as, ch->dest_ad, data);
}
if (ch->ti & BCM2708_DMA_D_INC) {
ch->dest_ad += 4;
}
/* update remaining transfer length */
xlen -= 4;
if (ch->ti & BCM2708_DMA_TDMODE) {
ch->txfr_len = (ylen << 16) | xlen;
} else {
ch->txfr_len = xlen;
}
}
if (--ylen != 0) {
ch->source_ad += src_stride;
ch->dest_ad += dst_stride;
}
}
ch->cs |= BCM2708_DMA_END;
if (ch->ti & BCM2708_DMA_INT_EN) {
ch->cs |= BCM2708_DMA_INT;
s->int_status |= (1 << c);
qemu_set_irq(ch->irq, 1);
}
/* Process next CB */
ch->conblk_ad = ch->nextconbk;
}
ch->cs &= ~BCM2708_DMA_ACTIVE;
ch->cs |= BCM2708_DMA_ISPAUSED;
}
static void bcm2835_dma_chan_reset(BCM2835DMAChan *ch)
{
ch->cs = 0;
ch->conblk_ad = 0;
}
static uint64_t bcm2835_dma_read(BCM2835DMAState *s, hwaddr offset,
unsigned size, unsigned c)
{
BCM2835DMAChan *ch;
uint32_t res = 0;
assert(size == 4);
assert(c < BCM2835_DMA_NCHANS);
ch = &s->chan[c];
switch (offset) {
case BCM2708_DMA_CS:
res = ch->cs;
break;
case BCM2708_DMA_ADDR:
res = ch->conblk_ad;
break;
case BCM2708_DMA_INFO:
res = ch->ti;
break;
case BCM2708_DMA_SOURCE_AD:
res = ch->source_ad;
break;
case BCM2708_DMA_DEST_AD:
res = ch->dest_ad;
break;
case BCM2708_DMA_TXFR_LEN:
res = ch->txfr_len;
break;
case BCM2708_DMA_STRIDE:
res = ch->stride;
break;
case BCM2708_DMA_NEXTCB:
res = ch->nextconbk;
break;
case BCM2708_DMA_DEBUG:
res = ch->debug;
break;
default:
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
__func__, offset);
break;
}
return res;
}
static void bcm2835_dma_write(BCM2835DMAState *s, hwaddr offset,
uint64_t value, unsigned size, unsigned c)
{
BCM2835DMAChan *ch;
uint32_t oldcs;
assert(size == 4);
assert(c < BCM2835_DMA_NCHANS);
ch = &s->chan[c];
switch (offset) {
case BCM2708_DMA_CS:
oldcs = ch->cs;
if (value & BCM2708_DMA_RESET) {
bcm2835_dma_chan_reset(ch);
}
if (value & BCM2708_DMA_ABORT) {
/* abort is a no-op, since we always run to completion */
}
if (value & BCM2708_DMA_END) {
ch->cs &= ~BCM2708_DMA_END;
}
if (value & BCM2708_DMA_INT) {
ch->cs &= ~BCM2708_DMA_INT;
s->int_status &= ~(1 << c);
qemu_set_irq(ch->irq, 0);
}
ch->cs &= ~BCM2708_DMA_CS_RW_MASK;
ch->cs |= (value & BCM2708_DMA_CS_RW_MASK);
if (!(oldcs & BCM2708_DMA_ACTIVE) && (ch->cs & BCM2708_DMA_ACTIVE)) {
bcm2835_dma_update(s, c);
}
break;
case BCM2708_DMA_ADDR:
ch->conblk_ad = value;
break;
case BCM2708_DMA_DEBUG:
ch->debug = value;
break;
default:
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
__func__, offset);
break;
}
}
static uint64_t bcm2835_dma0_read(void *opaque, hwaddr offset, unsigned size)
{
BCM2835DMAState *s = opaque;
if (offset < 0xf00) {
return bcm2835_dma_read(s, (offset & 0xff), size, (offset >> 8) & 0xf);
} else {
switch (offset) {
case BCM2708_DMA_INT_STATUS:
return s->int_status;
case BCM2708_DMA_ENABLE:
return s->enable;
default:
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
__func__, offset);
return 0;
}
}
}
static uint64_t bcm2835_dma15_read(void *opaque, hwaddr offset, unsigned size)
{
return bcm2835_dma_read(opaque, (offset & 0xff), size, 15);
}
static void bcm2835_dma0_write(void *opaque, hwaddr offset, uint64_t value,
unsigned size)
{
BCM2835DMAState *s = opaque;
if (offset < 0xf00) {
bcm2835_dma_write(s, (offset & 0xff), value, size, (offset >> 8) & 0xf);
} else {
switch (offset) {
case BCM2708_DMA_INT_STATUS:
break;
case BCM2708_DMA_ENABLE:
s->enable = (value & 0xffff);
break;
default:
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
__func__, offset);
}
}
}
static void bcm2835_dma15_write(void *opaque, hwaddr offset, uint64_t value,
unsigned size)
{
bcm2835_dma_write(opaque, (offset & 0xff), value, size, 15);
}
static const MemoryRegionOps bcm2835_dma0_ops = {
.read = bcm2835_dma0_read,
.write = bcm2835_dma0_write,
.endianness = DEVICE_NATIVE_ENDIAN,
.valid.min_access_size = 4,
.valid.max_access_size = 4,
};
static const MemoryRegionOps bcm2835_dma15_ops = {
.read = bcm2835_dma15_read,
.write = bcm2835_dma15_write,
.endianness = DEVICE_NATIVE_ENDIAN,
.valid.min_access_size = 4,
.valid.max_access_size = 4,
};
static const VMStateDescription vmstate_bcm2835_dma_chan = {
.name = TYPE_BCM2835_DMA "-chan",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(cs, BCM2835DMAChan),
VMSTATE_UINT32(conblk_ad, BCM2835DMAChan),
VMSTATE_UINT32(ti, BCM2835DMAChan),
VMSTATE_UINT32(source_ad, BCM2835DMAChan),
VMSTATE_UINT32(dest_ad, BCM2835DMAChan),
VMSTATE_UINT32(txfr_len, BCM2835DMAChan),
VMSTATE_UINT32(stride, BCM2835DMAChan),
VMSTATE_UINT32(nextconbk, BCM2835DMAChan),
VMSTATE_UINT32(debug, BCM2835DMAChan),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription vmstate_bcm2835_dma = {
.name = TYPE_BCM2835_DMA,
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_STRUCT_ARRAY(chan, BCM2835DMAState, BCM2835_DMA_NCHANS, 1,
vmstate_bcm2835_dma_chan, BCM2835DMAChan),
VMSTATE_UINT32(int_status, BCM2835DMAState),
VMSTATE_UINT32(enable, BCM2835DMAState),
VMSTATE_END_OF_LIST()
}
};
static void bcm2835_dma_init(Object *obj)
{
BCM2835DMAState *s = BCM2835_DMA(obj);
int n;
/* DMA channels 0-14 occupy a contiguous block of IO memory, along
* with the global enable and interrupt status bits. Channel 15
* has the same register map, but is mapped at a discontiguous
* address in a separate IO block.
*/
memory_region_init_io(&s->iomem0, OBJECT(s), &bcm2835_dma0_ops, s,
TYPE_BCM2835_DMA, 0x1000);
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem0);
memory_region_init_io(&s->iomem15, OBJECT(s), &bcm2835_dma15_ops, s,
TYPE_BCM2835_DMA "-chan15", 0x100);
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem15);
for (n = 0; n < 16; n++) {
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->chan[n].irq);
}
}
static void bcm2835_dma_reset(DeviceState *dev)
{
BCM2835DMAState *s = BCM2835_DMA(dev);
int n;
s->enable = 0xffff;
s->int_status = 0;
for (n = 0; n < BCM2835_DMA_NCHANS; n++) {
bcm2835_dma_chan_reset(&s->chan[n]);
}
}
static void bcm2835_dma_realize(DeviceState *dev, Error **errp)
{
BCM2835DMAState *s = BCM2835_DMA(dev);
Error *err = NULL;
Object *obj;
obj = object_property_get_link(OBJECT(dev), "dma-mr", &err);
if (obj == NULL) {
error_setg(errp, "%s: required dma-mr link not found: %s",
__func__, error_get_pretty(err));
return;
}
s->dma_mr = MEMORY_REGION(obj);
address_space_init(&s->dma_as, s->dma_mr, NULL);
bcm2835_dma_reset(dev);
}
static void bcm2835_dma_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = bcm2835_dma_realize;
dc->reset = bcm2835_dma_reset;
dc->vmsd = &vmstate_bcm2835_dma;
}
static TypeInfo bcm2835_dma_info = {
.name = TYPE_BCM2835_DMA,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(BCM2835DMAState),
.class_init = bcm2835_dma_class_init,
.instance_init = bcm2835_dma_init,
};
static void bcm2835_dma_register_types(void)
{
type_register_static(&bcm2835_dma_info);
}
type_init(bcm2835_dma_register_types)

View File

@@ -342,6 +342,10 @@ static void i8257_channel_run(I8257State *d, int ichan)
r->now[COUNT], (r->base[COUNT] + 1) << ncont);
r->now[COUNT] = n;
ldebug ("dma_pos %d size %d\n", n, (r->base[COUNT] + 1) << ncont);
if (n == (r->base[COUNT] + 1) << ncont) {
ldebug("transfer done\n");
d->status |= (1 << ichan);
}
}
static void i8257_dma_run(void *opaque)

View File

@@ -319,6 +319,7 @@ static void imx_i2c_class_init(ObjectClass *klass, void *data)
dc->vmsd = &imx_i2c_vmstate;
dc->reset = imx_i2c_reset;
dc->realize = imx_i2c_realize;
dc->desc = "i.MX I2C Controller";
}
static const TypeInfo imx_i2c_type_info = {

View File

@@ -37,8 +37,8 @@
#include "hw/acpi/bios-linker-loader.h"
#include "hw/loader.h"
#include "hw/isa/isa.h"
#include "hw/block/fdc.h"
#include "hw/acpi/memory_hotplug.h"
#include "hw/mem/nvdimm.h"
#include "sysemu/tpm.h"
#include "hw/acpi/tpm.h"
#include "sysemu/tpm_backend.h"
@@ -76,10 +76,6 @@
#define ACPI_BUILD_DPRINTF(fmt, ...)
#endif
typedef struct AcpiCpuInfo {
DECLARE_BITMAP(found_cpus, ACPI_CPU_HOTPLUG_ID_LIMIT);
} AcpiCpuInfo;
typedef struct AcpiMcfgInfo {
uint64_t mcfg_base;
uint32_t mcfg_size;
@@ -121,31 +117,6 @@ typedef struct AcpiBuildPciBusHotplugState {
bool pcihp_bridge_en;
} AcpiBuildPciBusHotplugState;
static
int acpi_add_cpu_info(Object *o, void *opaque)
{
AcpiCpuInfo *cpu = opaque;
uint64_t apic_id;
if (object_dynamic_cast(o, TYPE_CPU)) {
apic_id = object_property_get_int(o, "apic-id", NULL);
assert(apic_id < ACPI_CPU_HOTPLUG_ID_LIMIT);
set_bit(apic_id, cpu->found_cpus);
}
object_child_foreach(o, acpi_add_cpu_info, opaque);
return 0;
}
static void acpi_get_cpu_info(AcpiCpuInfo *cpu)
{
Object *root = object_get_root();
memset(cpu->found_cpus, 0, sizeof cpu->found_cpus);
object_child_foreach(root, acpi_add_cpu_info, cpu);
}
static void acpi_get_pm_info(AcpiPmInfo *pm)
{
Object *piix = piix4_pm_find();
@@ -362,9 +333,10 @@ build_fadt(GArray *table_data, GArray *linker, AcpiPmInfo *pm,
}
static void
build_madt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu)
build_madt(GArray *table_data, GArray *linker, PCMachineState *pcms)
{
PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
MachineClass *mc = MACHINE_GET_CLASS(pcms);
CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(pcms));
int madt_start = table_data->len;
AcpiMultipleApicTable *madt;
@@ -377,18 +349,28 @@ build_madt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu)
madt->local_apic_address = cpu_to_le32(APIC_DEFAULT_ADDRESS);
madt->flags = cpu_to_le32(1);
for (i = 0; i < pcms->apic_id_limit; i++) {
for (i = 0; i < apic_ids->len; i++) {
AcpiMadtProcessorApic *apic = acpi_data_push(table_data, sizeof *apic);
int apic_id = apic_ids->cpus[i].arch_id;
apic->type = ACPI_APIC_PROCESSOR;
apic->length = sizeof(*apic);
apic->processor_id = i;
apic->local_apic_id = i;
if (test_bit(i, cpu->found_cpus)) {
apic->processor_id = apic_id;
apic->local_apic_id = apic_id;
if (apic_ids->cpus[i].cpu != NULL) {
apic->flags = cpu_to_le32(1);
} else {
/* ACPI spec says that LAPIC entry for non present
* CPU may be omitted from MADT or it must be marked
* as disabled. However omitting non present CPU from
* MADT breaks hotplug on linux. So possible CPUs
* should be put in MADT but kept disabled.
*/
apic->flags = cpu_to_le32(0);
}
}
g_free(apic_ids);
io_apic = acpi_data_push(table_data, sizeof *io_apic);
io_apic->type = ACPI_APIC_IO;
io_apic->length = sizeof(*io_apic);
@@ -960,21 +942,24 @@ static Aml *build_crs(PCIHostState *host,
return crs;
}
static void build_processor_devices(Aml *sb_scope, unsigned acpi_cpus,
AcpiCpuInfo *cpu, AcpiPmInfo *pm)
static void build_processor_devices(Aml *sb_scope, MachineState *machine,
AcpiPmInfo *pm)
{
int i;
int i, apic_idx;
Aml *dev;
Aml *crs;
Aml *pkg;
Aml *field;
Aml *ifctx;
Aml *method;
MachineClass *mc = MACHINE_GET_CLASS(machine);
CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine);
PCMachineState *pcms = PC_MACHINE(machine);
/* The current AML generator can cover the APIC ID range [0..255],
* inclusive, for VCPU hotplug. */
QEMU_BUILD_BUG_ON(ACPI_CPU_HOTPLUG_ID_LIMIT > 256);
g_assert(acpi_cpus <= ACPI_CPU_HOTPLUG_ID_LIMIT);
g_assert(pcms->apic_id_limit <= ACPI_CPU_HOTPLUG_ID_LIMIT);
/* create PCI0.PRES device and its _CRS to reserve CPU hotplug MMIO */
dev = aml_device("PCI0." stringify(CPU_HOTPLUG_RESOURCE_DEVICE));
@@ -993,28 +978,33 @@ static void build_processor_devices(Aml *sb_scope, unsigned acpi_cpus,
aml_append(sb_scope, dev);
/* declare CPU hotplug MMIO region and PRS field to access it */
aml_append(sb_scope, aml_operation_region(
"PRST", AML_SYSTEM_IO, pm->cpu_hp_io_base, pm->cpu_hp_io_len));
"PRST", AML_SYSTEM_IO, aml_int(pm->cpu_hp_io_base), pm->cpu_hp_io_len));
field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
aml_append(field, aml_named_field("PRS", 256));
aml_append(sb_scope, field);
/* build Processor object for each processor */
for (i = 0; i < acpi_cpus; i++) {
dev = aml_processor(i, 0, 0, "CP%.02X", i);
for (i = 0; i < apic_ids->len; i++) {
int apic_id = apic_ids->cpus[i].arch_id;
assert(apic_id < ACPI_CPU_HOTPLUG_ID_LIMIT);
dev = aml_processor(apic_id, 0, 0, "CP%.02X", apic_id);
method = aml_method("_MAT", 0, AML_NOTSERIALIZED);
aml_append(method,
aml_return(aml_call1(CPU_MAT_METHOD, aml_int(i))));
aml_return(aml_call1(CPU_MAT_METHOD, aml_int(apic_id))));
aml_append(dev, method);
method = aml_method("_STA", 0, AML_NOTSERIALIZED);
aml_append(method,
aml_return(aml_call1(CPU_STATUS_METHOD, aml_int(i))));
aml_return(aml_call1(CPU_STATUS_METHOD, aml_int(apic_id))));
aml_append(dev, method);
method = aml_method("_EJ0", 1, AML_NOTSERIALIZED);
aml_append(method,
aml_return(aml_call2(CPU_EJECT_METHOD, aml_int(i), aml_arg(0)))
aml_return(aml_call2(CPU_EJECT_METHOD, aml_int(apic_id),
aml_arg(0)))
);
aml_append(dev, method);
@@ -1026,10 +1016,12 @@ static void build_processor_devices(Aml *sb_scope, unsigned acpi_cpus,
*/
/* Arg0 = Processor ID = APIC ID */
method = aml_method(AML_NOTIFY_METHOD, 2, AML_NOTSERIALIZED);
for (i = 0; i < acpi_cpus; i++) {
ifctx = aml_if(aml_equal(aml_arg(0), aml_int(i)));
for (i = 0; i < apic_ids->len; i++) {
int apic_id = apic_ids->cpus[i].arch_id;
ifctx = aml_if(aml_equal(aml_arg(0), aml_int(apic_id)));
aml_append(ifctx,
aml_notify(aml_name("CP%.02X", i), aml_arg(1))
aml_notify(aml_name("CP%.02X", apic_id), aml_arg(1))
);
aml_append(method, ifctx);
}
@@ -1042,14 +1034,20 @@ static void build_processor_devices(Aml *sb_scope, unsigned acpi_cpus,
* ith up to 255 elements. Windows guests up to win2k8 fail when
* VarPackageOp is used.
*/
pkg = acpi_cpus <= 255 ? aml_package(acpi_cpus) :
aml_varpackage(acpi_cpus);
pkg = pcms->apic_id_limit <= 255 ? aml_package(pcms->apic_id_limit) :
aml_varpackage(pcms->apic_id_limit);
for (i = 0; i < acpi_cpus; i++) {
uint8_t b = test_bit(i, cpu->found_cpus) ? 0x01 : 0x00;
aml_append(pkg, aml_int(b));
for (i = 0, apic_idx = 0; i < apic_ids->len; i++) {
int apic_id = apic_ids->cpus[i].arch_id;
for (; apic_idx < apic_id; apic_idx++) {
aml_append(pkg, aml_int(0));
}
aml_append(pkg, aml_int(apic_ids->cpus[i].cpu ? 1 : 0));
apic_idx = apic_id + 1;
}
aml_append(sb_scope, aml_name_decl(CPU_ON_BITMAP, pkg));
g_free(apic_ids);
}
static void build_memory_devices(Aml *sb_scope, int nr_mem,
@@ -1078,7 +1076,7 @@ static void build_memory_devices(Aml *sb_scope, int nr_mem,
aml_append(scope, aml_operation_region(
MEMORY_HOTPLUG_IO_REGION, AML_SYSTEM_IO,
io_base, io_len)
aml_int(io_base), io_len)
);
field = aml_field(MEMORY_HOTPLUG_IO_REGION, AML_DWORD_ACC,
@@ -1192,7 +1190,8 @@ static void build_hpet_aml(Aml *table)
aml_append(dev, aml_name_decl("_UID", zero));
aml_append(dev,
aml_operation_region("HPTM", AML_SYSTEM_MEMORY, HPET_BASE, HPET_LEN));
aml_operation_region("HPTM", AML_SYSTEM_MEMORY, aml_int(HPET_BASE),
HPET_LEN));
field = aml_field("HPTM", AML_DWORD_ACC, AML_LOCK, AML_PRESERVE);
aml_append(field, aml_named_field("VEND", 32));
aml_append(field, aml_named_field("PRD", 32));
@@ -1227,33 +1226,63 @@ static void build_hpet_aml(Aml *table)
aml_append(table, scope);
}
static Aml *build_fdc_device_aml(void)
static Aml *build_fdinfo_aml(int idx, FloppyDriveType type)
{
Aml *dev, *fdi;
uint8_t maxc, maxh, maxs;
isa_fdc_get_drive_max_chs(type, &maxc, &maxh, &maxs);
dev = aml_device("FLP%c", 'A' + idx);
aml_append(dev, aml_name_decl("_ADR", aml_int(idx)));
fdi = aml_package(16);
aml_append(fdi, aml_int(idx)); /* Drive Number */
aml_append(fdi,
aml_int(cmos_get_fd_drive_type(type))); /* Device Type */
/*
* the values below are the limits of the drive, and are thus independent
* of the inserted media
*/
aml_append(fdi, aml_int(maxc)); /* Maximum Cylinder Number */
aml_append(fdi, aml_int(maxs)); /* Maximum Sector Number */
aml_append(fdi, aml_int(maxh)); /* Maximum Head Number */
/*
* SeaBIOS returns the below values for int 0x13 func 0x08 regardless of
* the drive type, so shall we
*/
aml_append(fdi, aml_int(0xAF)); /* disk_specify_1 */
aml_append(fdi, aml_int(0x02)); /* disk_specify_2 */
aml_append(fdi, aml_int(0x25)); /* disk_motor_wait */
aml_append(fdi, aml_int(0x02)); /* disk_sector_siz */
aml_append(fdi, aml_int(0x12)); /* disk_eot */
aml_append(fdi, aml_int(0x1B)); /* disk_rw_gap */
aml_append(fdi, aml_int(0xFF)); /* disk_dtl */
aml_append(fdi, aml_int(0x6C)); /* disk_formt_gap */
aml_append(fdi, aml_int(0xF6)); /* disk_fill */
aml_append(fdi, aml_int(0x0F)); /* disk_head_sttl */
aml_append(fdi, aml_int(0x08)); /* disk_motor_strt */
aml_append(dev, aml_name_decl("_FDI", fdi));
return dev;
}
static Aml *build_fdc_device_aml(ISADevice *fdc)
{
int i;
Aml *dev;
Aml *crs;
Aml *method;
Aml *if_ctx;
Aml *else_ctx;
Aml *zero = aml_int(0);
Aml *is_present = aml_local(0);
#define ACPI_FDE_MAX_FD 4
uint32_t fde_buf[5] = {
0, 0, 0, 0, /* presence of floppy drives #0 - #3 */
cpu_to_le32(2) /* tape presence (2 == never present) */
};
dev = aml_device("FDC0");
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0700")));
method = aml_method("_STA", 0, AML_NOTSERIALIZED);
aml_append(method, aml_store(aml_name("FDEN"), is_present));
if_ctx = aml_if(aml_equal(is_present, zero));
{
aml_append(if_ctx, aml_return(aml_int(0x00)));
}
aml_append(method, if_ctx);
else_ctx = aml_else();
{
aml_append(else_ctx, aml_return(aml_int(0x0f)));
}
aml_append(method, else_ctx);
aml_append(dev, method);
crs = aml_resource_template();
aml_append(crs, aml_io(AML_DECODE16, 0x03F2, 0x03F2, 0x00, 0x04));
aml_append(crs, aml_io(AML_DECODE16, 0x03F7, 0x03F7, 0x00, 0x01));
@@ -1262,6 +1291,17 @@ static Aml *build_fdc_device_aml(void)
aml_dma(AML_COMPATIBILITY, AML_NOTBUSMASTER, AML_TRANSFER8, 2));
aml_append(dev, aml_name_decl("_CRS", crs));
for (i = 0; i < MIN(MAX_FD, ACPI_FDE_MAX_FD); i++) {
FloppyDriveType type = isa_fdc_get_drive_type(fdc, i);
if (type < FLOPPY_DRIVE_TYPE_NONE) {
fde_buf[i] = cpu_to_le32(1); /* drive present */
aml_append(dev, build_fdinfo_aml(i, type));
}
}
aml_append(dev, aml_name_decl("_FDE",
aml_buffer(sizeof(fde_buf), (uint8_t *)fde_buf)));
return dev;
}
@@ -1406,12 +1446,16 @@ static Aml *build_com_device_aml(uint8_t uid)
static void build_isa_devices_aml(Aml *table)
{
ISADevice *fdc = pc_find_fdc0();
Aml *scope = aml_scope("_SB.PCI0.ISA");
aml_append(scope, build_rtc_device_aml());
aml_append(scope, build_kbd_device_aml());
aml_append(scope, build_mouse_device_aml());
aml_append(scope, build_fdc_device_aml());
if (fdc) {
aml_append(scope, build_fdc_device_aml(fdc));
}
aml_append(scope, build_lpt_device_aml());
aml_append(scope, build_com_device_aml(1));
aml_append(scope, build_com_device_aml(2));
@@ -1430,7 +1474,7 @@ static void build_dbg_aml(Aml *table)
Aml *idx = aml_local(2);
aml_append(scope,
aml_operation_region("DBG", AML_SYSTEM_IO, 0x0402, 0x01));
aml_operation_region("DBG", AML_SYSTEM_IO, aml_int(0x0402), 0x01));
field = aml_field("DBG", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
aml_append(field, aml_named_field("DBGB", 8));
aml_append(scope, field);
@@ -1509,6 +1553,12 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
aml_append(dev, aml_name_decl("_CRS", crs));
/*
* _DIS can be no-op because the interrupt cannot be disabled.
*/
method = aml_method("_DIS", 0, AML_NOTSERIALIZED);
aml_append(dev, method);
method = aml_method("_SRS", 1, AML_NOTSERIALIZED);
aml_append(dev, method);
@@ -1742,18 +1792,14 @@ static void build_q35_pci0_int(Aml *table)
aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG")));
aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH")));
/*
* TODO: UID probably shouldn't be the same for GSIx devices
* but that's how it was in original ASL so keep it for now
*/
aml_append(sb_scope, build_gsi_link_dev("GSIA", 0, 0x10));
aml_append(sb_scope, build_gsi_link_dev("GSIB", 0, 0x11));
aml_append(sb_scope, build_gsi_link_dev("GSIC", 0, 0x12));
aml_append(sb_scope, build_gsi_link_dev("GSID", 0, 0x13));
aml_append(sb_scope, build_gsi_link_dev("GSIE", 0, 0x14));
aml_append(sb_scope, build_gsi_link_dev("GSIF", 0, 0x15));
aml_append(sb_scope, build_gsi_link_dev("GSIG", 0, 0x16));
aml_append(sb_scope, build_gsi_link_dev("GSIH", 0, 0x17));
aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10));
aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11));
aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12));
aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13));
aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14));
aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15));
aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16));
aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17));
aml_append(table, sb_scope);
}
@@ -1770,28 +1816,25 @@ static void build_q35_isa_bridge(Aml *table)
/* ICH9 PCI to ISA irq remapping */
aml_append(dev, aml_operation_region("PIRQ", AML_PCI_CONFIG,
0x60, 0x0C));
aml_int(0x60), 0x0C));
aml_append(dev, aml_operation_region("LPCD", AML_PCI_CONFIG,
0x80, 0x02));
aml_int(0x80), 0x02));
field = aml_field("LPCD", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
aml_append(field, aml_named_field("COMA", 3));
aml_append(field, aml_reserved_field(1));
aml_append(field, aml_named_field("COMB", 3));
aml_append(field, aml_reserved_field(1));
aml_append(field, aml_named_field("LPTD", 2));
aml_append(field, aml_reserved_field(2));
aml_append(field, aml_named_field("FDCD", 2));
aml_append(dev, field);
aml_append(dev, aml_operation_region("LPCE", AML_PCI_CONFIG,
0x82, 0x02));
aml_int(0x82), 0x02));
/* enable bits */
field = aml_field("LPCE", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
aml_append(field, aml_named_field("CAEN", 1));
aml_append(field, aml_named_field("CBEN", 1));
aml_append(field, aml_named_field("LPEN", 1));
aml_append(field, aml_named_field("FDEN", 1));
aml_append(dev, field);
aml_append(scope, dev);
@@ -1808,7 +1851,7 @@ static void build_piix4_pm(Aml *table)
aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010003)));
aml_append(dev, aml_operation_region("P13C", AML_PCI_CONFIG,
0x00, 0xff));
aml_int(0x00), 0xff));
aml_append(scope, dev);
aml_append(table, scope);
}
@@ -1825,7 +1868,7 @@ static void build_piix4_isa_bridge(Aml *table)
/* PIIX PCI to ISA irq remapping */
aml_append(dev, aml_operation_region("P40C", AML_PCI_CONFIG,
0x60, 0x04));
aml_int(0x60), 0x04));
/* enable bits */
field = aml_field("^PX13.P13C", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
/* Offset(0x5f),, 7, */
@@ -1839,7 +1882,6 @@ static void build_piix4_isa_bridge(Aml *table)
aml_append(field, aml_reserved_field(3));
aml_append(field, aml_named_field("CBEN", 1));
aml_append(dev, field);
aml_append(dev, aml_name_decl("FDEN", aml_int(1)));
aml_append(scope, dev);
aml_append(table, scope);
@@ -1854,20 +1896,20 @@ static void build_piix4_pci_hotplug(Aml *table)
scope = aml_scope("_SB.PCI0");
aml_append(scope,
aml_operation_region("PCST", AML_SYSTEM_IO, 0xae00, 0x08));
aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(0xae00), 0x08));
field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
aml_append(field, aml_named_field("PCIU", 32));
aml_append(field, aml_named_field("PCID", 32));
aml_append(scope, field);
aml_append(scope,
aml_operation_region("SEJ", AML_SYSTEM_IO, 0xae08, 0x04));
aml_operation_region("SEJ", AML_SYSTEM_IO, aml_int(0xae08), 0x04));
field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
aml_append(field, aml_named_field("B0EJ", 32));
aml_append(scope, field);
aml_append(scope,
aml_operation_region("BNMR", AML_SYSTEM_IO, 0xae10, 0x04));
aml_operation_region("BNMR", AML_SYSTEM_IO, aml_int(0xae10), 0x04));
field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
aml_append(field, aml_named_field("BNUM", 32));
aml_append(scope, field);
@@ -1937,14 +1979,13 @@ static Aml *build_q35_osc_method(void)
static void
build_dsdt(GArray *table_data, GArray *linker,
AcpiCpuInfo *cpu, AcpiPmInfo *pm, AcpiMiscInfo *misc,
PcPciInfo *pci)
AcpiPmInfo *pm, AcpiMiscInfo *misc,
PcPciInfo *pci, MachineState *machine)
{
CrsRangeEntry *entry;
Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs;
GPtrArray *mem_ranges = g_ptr_array_new_with_free_func(crs_range_free);
GPtrArray *io_ranges = g_ptr_array_new_with_free_func(crs_range_free);
MachineState *machine = MACHINE(qdev_get_machine());
PCMachineState *pcms = PC_MACHINE(machine);
uint32_t nr_mem = machine->ram_slots;
int root_bus_limit = 0xFF;
@@ -1975,9 +2016,9 @@ build_dsdt(GArray *table_data, GArray *linker,
} else {
sb_scope = aml_scope("_SB");
aml_append(sb_scope,
aml_operation_region("PCST", AML_SYSTEM_IO, 0xae00, 0x0c));
aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(0xae00), 0x0c));
aml_append(sb_scope,
aml_operation_region("PCSB", AML_SYSTEM_IO, 0xae0c, 0x01));
aml_operation_region("PCSB", AML_SYSTEM_IO, aml_int(0xae0c), 0x01));
field = aml_field("PCSB", AML_ANY_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
aml_append(field, aml_named_field("PCIB", 8));
aml_append(sb_scope, field);
@@ -2190,6 +2231,35 @@ build_dsdt(GArray *table_data, GArray *linker,
aml_append(scope, aml_name_decl("_S5", pkg));
aml_append(dsdt, scope);
/* create fw_cfg node, unconditionally */
{
/* when using port i/o, the 8-bit data register *always* overlaps
* with half of the 16-bit control register. Hence, the total size
* of the i/o region used is FW_CFG_CTL_SIZE; when using DMA, the
* DMA control register is located at FW_CFG_DMA_IO_BASE + 4 */
uint8_t io_size = object_property_get_bool(OBJECT(pcms->fw_cfg),
"dma_enabled", NULL) ?
ROUND_UP(FW_CFG_CTL_SIZE, 4) + sizeof(dma_addr_t) :
FW_CFG_CTL_SIZE;
scope = aml_scope("\\_SB.PCI0");
dev = aml_device("FWCF");
aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002")));
/* device present, functioning, decoding, not shown in UI */
aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
crs = aml_resource_template();
aml_append(crs,
aml_io(AML_DECODE16, FW_CFG_IO_BASE, FW_CFG_IO_BASE, 0x01, io_size)
);
aml_append(dev, aml_name_decl("_CRS", crs));
aml_append(scope, dev);
aml_append(dsdt, scope);
}
if (misc->applesmc_io_base) {
scope = aml_scope("\\_SB.PCI0.ISA");
dev = aml_device("SMC");
@@ -2223,7 +2293,7 @@ build_dsdt(GArray *table_data, GArray *linker,
aml_append(dev, aml_name_decl("_CRS", crs));
aml_append(dev, aml_operation_region("PEOR", AML_SYSTEM_IO,
misc->pvpanic_port, 1));
aml_int(misc->pvpanic_port), 1));
field = aml_field("PEOR", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
aml_append(field, aml_named_field("PEPT", 8));
aml_append(dev, field);
@@ -2246,7 +2316,7 @@ build_dsdt(GArray *table_data, GArray *linker,
sb_scope = aml_scope("\\_SB");
{
build_processor_devices(sb_scope, pcms->apic_id_limit, cpu, pm);
build_processor_devices(sb_scope, machine, pm);
build_memory_devices(sb_scope, nr_mem, pm->mem_hp_io_base,
pm->mem_hp_io_len);
@@ -2367,7 +2437,7 @@ acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
}
static void
build_srat(GArray *table_data, GArray *linker)
build_srat(GArray *table_data, GArray *linker, MachineState *machine)
{
AcpiSystemResourceAffinityTable *srat;
AcpiSratProcessorAffinity *core;
@@ -2377,7 +2447,9 @@ build_srat(GArray *table_data, GArray *linker)
uint64_t curnode;
int srat_start, numa_start, slots;
uint64_t mem_len, mem_base, next_base;
PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
MachineClass *mc = MACHINE_GET_CLASS(machine);
CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine);
PCMachineState *pcms = PC_MACHINE(machine);
ram_addr_t hotplugabble_address_space_size =
object_property_get_int(OBJECT(pcms), PC_MACHINE_MEMHP_REGION_SIZE,
NULL);
@@ -2386,14 +2458,15 @@ build_srat(GArray *table_data, GArray *linker)
srat = acpi_data_push(table_data, sizeof *srat);
srat->reserved1 = cpu_to_le32(1);
core = (void *)(srat + 1);
for (i = 0; i < pcms->apic_id_limit; ++i) {
for (i = 0; i < apic_ids->len; i++) {
int apic_id = apic_ids->cpus[i].arch_id;
core = acpi_data_push(table_data, sizeof *core);
core->type = ACPI_SRAT_PROCESSOR;
core->length = sizeof(*core);
core->local_apic_id = i;
curnode = pcms->node_cpu[i];
core->local_apic_id = apic_id;
curnode = pcms->node_cpu[apic_id];
core->proximity_lo = curnode;
memset(core->proximity_hi, 0, 3);
core->local_sapic_eid = 0;
@@ -2458,6 +2531,7 @@ build_srat(GArray *table_data, GArray *linker)
(void *)(table_data->data + srat_start),
"SRAT",
table_data->len - srat_start, 1, NULL, NULL);
g_free(apic_ids);
}
static void
@@ -2581,21 +2655,13 @@ static bool acpi_has_iommu(void)
return intel_iommu && !ambiguous;
}
static bool acpi_has_nvdimm(void)
{
PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
return pcms->nvdimm;
}
static
void acpi_build(AcpiBuildTables *tables)
void acpi_build(AcpiBuildTables *tables, MachineState *machine)
{
PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
PCMachineState *pcms = PC_MACHINE(machine);
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
GArray *table_offsets;
unsigned facs, dsdt, rsdt, fadt;
AcpiCpuInfo cpu;
AcpiPmInfo pm;
AcpiMiscInfo misc;
AcpiMcfgInfo mcfg;
@@ -2605,7 +2671,6 @@ void acpi_build(AcpiBuildTables *tables)
GArray *tables_blob = tables->table_data;
AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL };
acpi_get_cpu_info(&cpu);
acpi_get_pm_info(&pm);
acpi_get_misc_info(&misc);
acpi_get_pci_info(&pci);
@@ -2629,7 +2694,7 @@ void acpi_build(AcpiBuildTables *tables)
/* DSDT is pointed to by FADT */
dsdt = tables_blob->len;
build_dsdt(tables_blob, tables->linker, &cpu, &pm, &misc, &pci);
build_dsdt(tables_blob, tables->linker, &pm, &misc, &pci, machine);
/* Count the size of the DSDT and SSDT, we will need it for legacy
* sizing of ACPI tables.
@@ -2644,7 +2709,7 @@ void acpi_build(AcpiBuildTables *tables)
aml_len += tables_blob->len - fadt;
acpi_add_table(table_offsets, tables_blob);
build_madt(tables_blob, tables->linker, &cpu);
build_madt(tables_blob, tables->linker, pcms);
if (misc.has_hpet) {
acpi_add_table(table_offsets, tables_blob);
@@ -2661,7 +2726,7 @@ void acpi_build(AcpiBuildTables *tables)
}
if (pcms->numa_nodes) {
acpi_add_table(table_offsets, tables_blob);
build_srat(tables_blob, tables->linker);
build_srat(tables_blob, tables->linker, machine);
}
if (acpi_get_mcfg(&mcfg)) {
acpi_add_table(table_offsets, tables_blob);
@@ -2671,8 +2736,7 @@ void acpi_build(AcpiBuildTables *tables)
acpi_add_table(table_offsets, tables_blob);
build_dmar_q35(tables_blob, tables->linker);
}
if (acpi_has_nvdimm()) {
if (pcms->acpi_nvdimm_state.is_enabled) {
nvdimm_build_acpi(table_offsets, tables_blob, tables->linker);
}
@@ -2766,7 +2830,7 @@ static void acpi_build_update(void *build_opaque)
acpi_build_tables_init(&tables);
acpi_build(&tables);
acpi_build(&tables, MACHINE(qdev_get_machine()));
acpi_ram_update(build_state->table_mr, tables.table_data);
@@ -2831,7 +2895,7 @@ void acpi_setup(void)
acpi_set_pci_info();
acpi_build_tables_init(&tables);
acpi_build(&tables);
acpi_build(&tables, MACHINE(pcms));
/* Now expose it all to Guest */
build_state->table_mr = acpi_add_rom_blob(build_state, tables.table_data,

View File

@@ -186,7 +186,7 @@ static void kvm_apic_realize(DeviceState *dev, Error **errp)
APIC_SPACE_SIZE);
if (kvm_has_gsi_routing()) {
msi_supported = true;
msi_nonbroken = true;
}
}

View File

@@ -196,7 +196,8 @@ int load_multiboot(FWCfgState *fw_cfg,
}
kernel_size = load_elf(kernel_filename, NULL, NULL, &elf_entry,
&elf_low, &elf_high, 0, I386_ELF_MACHINE, 0);
&elf_low, &elf_high, 0, I386_ELF_MACHINE,
0, 0);
if (kernel_size < 0) {
fprintf(stderr, "Error while loading elf kernel\n");
exit(1);

View File

@@ -78,7 +78,6 @@
#define DPRINTF(fmt, ...)
#endif
#define BIOS_CFG_IOPORT 0x510
#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
@@ -200,7 +199,7 @@ static void pic_irq_request(void *opaque, int irq, int level)
#define REG_EQUIPMENT_BYTE 0x14
static int cmos_get_fd_drive_type(FloppyDriveType fd0)
int cmos_get_fd_drive_type(FloppyDriveType fd0)
{
int val;
@@ -700,18 +699,6 @@ static uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index)
}
}
/* Calculates the limit to CPU APIC ID values
*
* This function returns the limit for the APIC ID value, so that all
* CPU APIC IDs are < pc_apic_id_limit().
*
* This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
*/
static unsigned int pc_apic_id_limit(unsigned int max_cpus)
{
return x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
}
static void pc_build_smbios(FWCfgState *fw_cfg)
{
uint8_t *smbios_tables, *smbios_anchor;
@@ -749,14 +736,13 @@ static void pc_build_smbios(FWCfgState *fw_cfg)
}
}
static FWCfgState *bochs_bios_init(AddressSpace *as)
static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
{
FWCfgState *fw_cfg;
uint64_t *numa_fw_cfg;
int i, j;
unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
fw_cfg = fw_cfg_init_io_dma(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 4, as);
fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as);
/* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
*
@@ -772,7 +758,7 @@ static FWCfgState *bochs_bios_init(AddressSpace *as)
* [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is
* the APIC ID, not the "CPU index"
*/
fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)apic_id_limit);
fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
acpi_tables, acpi_tables_len);
@@ -790,11 +776,11 @@ static FWCfgState *bochs_bios_init(AddressSpace *as)
* of nodes, one word for each VCPU->node and one word for each node to
* hold the amount of memory.
*/
numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes);
numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
for (i = 0; i < max_cpus; i++) {
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
assert(apic_id < apic_id_limit);
assert(apic_id < pcms->apic_id_limit);
for (j = 0; j < nb_numa_nodes; j++) {
if (test_bit(i, numa_info[j].node_cpu)) {
numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
@@ -803,10 +789,11 @@ static FWCfgState *bochs_bios_init(AddressSpace *as)
}
}
for (i = 0; i < nb_numa_nodes; i++) {
numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem);
numa_fw_cfg[pcms->apic_id_limit + 1 + i] =
cpu_to_le64(numa_info[i].node_mem);
}
fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
(1 + apic_id_limit + nb_numa_nodes) *
(1 + pcms->apic_id_limit + nb_numa_nodes) *
sizeof(*numa_fw_cfg));
return fw_cfg;
@@ -1120,7 +1107,6 @@ void pc_cpus_init(PCMachineState *pcms)
int i;
X86CPU *cpu = NULL;
MachineState *machine = MACHINE(pcms);
unsigned long apic_id_limit;
/* init CPUs */
if (machine->cpu_model == NULL) {
@@ -1131,17 +1117,31 @@ void pc_cpus_init(PCMachineState *pcms)
#endif
}
apic_id_limit = pc_apic_id_limit(max_cpus);
if (apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) {
error_report("max_cpus is too large. APIC ID of last CPU is %lu",
apic_id_limit - 1);
/* Calculates the limit to CPU APIC ID values
*
* Limit for the APIC ID value, so that all
* CPU APIC IDs are < pcms->apic_id_limit.
*
* This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
*/
pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
if (pcms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) {
error_report("max_cpus is too large. APIC ID of last CPU is %u",
pcms->apic_id_limit - 1);
exit(1);
}
for (i = 0; i < smp_cpus; i++) {
cpu = pc_new_cpu(machine->cpu_model, x86_cpu_apic_id_from_index(i),
&error_fatal);
object_unref(OBJECT(cpu));
pcms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
sizeof(CPUArchId) * max_cpus);
for (i = 0; i < max_cpus; i++) {
pcms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i);
pcms->possible_cpus->len++;
if (i < smp_cpus) {
cpu = pc_new_cpu(machine->cpu_model, x86_cpu_apic_id_from_index(i),
&error_fatal);
pcms->possible_cpus->cpus[i].cpu = CPU(cpu);
object_unref(OBJECT(cpu));
}
}
/* tell smbios about cpuid version and features */
@@ -1187,7 +1187,6 @@ void pc_guest_info_init(PCMachineState *pcms)
{
int i, j;
pcms->apic_id_limit = pc_apic_id_limit(max_cpus);
pcms->apic_xrupt_override = kvm_allows_irq0_override();
pcms->numa_nodes = nb_numa_nodes;
pcms->node_mem = g_malloc0(pcms->numa_nodes *
@@ -1258,7 +1257,7 @@ void xen_load_linux(PCMachineState *pcms)
assert(MACHINE(pcms)->kernel_filename != NULL);
fw_cfg = fw_cfg_init_io(BIOS_CFG_IOPORT);
fw_cfg = fw_cfg_init_io(FW_CFG_IO_BASE);
rom_set_fw(fw_cfg);
load_linux(pcms, fw_cfg);
@@ -1372,7 +1371,7 @@ void pc_memory_init(PCMachineState *pcms,
option_rom_mr,
1);
fw_cfg = bochs_bios_init(&address_space_memory);
fw_cfg = bochs_bios_init(&address_space_memory, pcms);
rom_set_fw(fw_cfg);
@@ -1665,9 +1664,19 @@ static void pc_dimm_unplug(HotplugHandler *hotplug_dev,
error_propagate(errp, local_err);
}
static int pc_apic_cmp(const void *a, const void *b)
{
CPUArchId *apic_a = (CPUArchId *)a;
CPUArchId *apic_b = (CPUArchId *)b;
return apic_a->arch_id - apic_b->arch_id;
}
static void pc_cpu_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
CPUClass *cc = CPU_GET_CLASS(dev);
CPUArchId apic_id, *found_cpu;
HotplugHandlerClass *hhc;
Error *local_err = NULL;
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
@@ -1690,6 +1699,13 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev,
/* increment the number of CPUs */
rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1);
apic_id.arch_id = cc->get_arch_id(CPU(dev));
found_cpu = bsearch(&apic_id, pcms->possible_cpus->cpus,
pcms->possible_cpus->len, sizeof(*pcms->possible_cpus->cpus),
pc_apic_cmp);
assert(found_cpu);
found_cpu->cpu = CPU(dev);
out:
error_propagate(errp, local_err);
}
@@ -1854,14 +1870,14 @@ static bool pc_machine_get_nvdimm(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
return pcms->nvdimm;
return pcms->acpi_nvdimm_state.is_enabled;
}
static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
pcms->nvdimm = value;
pcms->acpi_nvdimm_state.is_enabled = value;
}
static void pc_machine_initfn(Object *obj)
@@ -1900,7 +1916,7 @@ static void pc_machine_initfn(Object *obj)
&error_abort);
/* nvdimm is disabled on default. */
pcms->nvdimm = false;
pcms->acpi_nvdimm_state.is_enabled = false;
object_property_add_bool(obj, PC_MACHINE_NVDIMM, pc_machine_get_nvdimm,
pc_machine_set_nvdimm, &error_abort);
}
@@ -1932,6 +1948,17 @@ static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index)
return topo.pkg_id;
}
static CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *machine)
{
PCMachineState *pcms = PC_MACHINE(machine);
int len = sizeof(CPUArchIdList) +
sizeof(CPUArchId) * (pcms->possible_cpus->len);
CPUArchIdList *list = g_malloc(len);
memcpy(list, pcms->possible_cpus, len);
return list;
}
static void pc_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -1954,6 +1981,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->save_tsc_khz = true;
mc->get_hotplug_handler = pc_get_hotpug_handler;
mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
mc->default_boot_order = "cad";
mc->hot_add_cpu = pc_hot_add_cpu;
mc->max_cpus = 255;

View File

@@ -274,6 +274,11 @@ static void pc_init1(MachineState *machine,
if (pcmc->pci_enabled) {
pc_pci_device_init(pci_bus);
}
if (pcms->acpi_nvdimm_state.is_enabled) {
nvdimm_init_acpi_state(&pcms->acpi_nvdimm_state, system_io,
pcms->fw_cfg, OBJECT(pcms));
}
}
/* Looking for a pc_compat_2_4() function? It doesn't exist.

Some files were not shown because too many files have changed in this diff Show More