Compare commits

...

449 Commits

Author SHA1 Message Date
Gerd Hoffmann
2dc120beb8 vnc: fix double free issues
Reported by Coverity: CID 1371242, 1371243, 1371244.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487682332-29154-1-git-send-email-kraxel@redhat.com
2017-02-27 16:22:01 +01:00
Gerd Hoffmann
8bf69b499a spice: add display & head options
This allows to specify display and head to use, simliar to vnc.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1487663858-11731-1-git-send-email-kraxel@redhat.com
2017-02-27 16:21:23 +01:00
Daniel P. Berrange
857e479552 ui: Use XkbGetMap and XkbGetNames instead of XkbGetKeyboard
XkbGetKeyboard does not work in XWayland and even on non-Wayland
X11 servers its use is discouraged:

  https://bugs.freedesktop.org/show_bug.cgi?id=89240

This resolves a problem whereby QEMU prints

  "could not lookup keycode name"

on startup when running under XWayland. Keymap handling is
however still broken after this commit, since Xwayland is
reporting a keymap we can't handle

  "unknown keycodes `(unnamed)', please report to qemu-devel@nongnu.org"

NB, native Wayland support (which is the default under GTK3) is
not affected - only XWayland (which can be requested with GDK_BACKEND
on GTK3, and is the only option for GTK2).

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170227132343.30824-1-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-27 16:19:47 +01:00
Gerd Hoffmann
543a7a161f gtk-egl: add scanout_disable support
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-7-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
db6cdfbeba sdl2: add scanout_disable support
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-6-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
46ffd0c031 spice: add scanout_disable support
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-5-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
975896fc88 virtio-gpu: use dpy_gl_scanout_disable
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-4-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
eaa92c76ce console: add dpy_gl_scanout_disable
Helper function (and DisplayChangeListenerOps ptr) to disable scanouts.
Replaces using dpy_gl_scanout_texture with 0x0 size and no texture
specified.

Allows cleanups to make the io and gfx emulation code more readable.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-3-git-send-email-kraxel@redhat.com
2017-02-27 16:15:28 +01:00
Gerd Hoffmann
f4c36bdab6 console: rename dpy_gl_scanout to dpy_gl_scanout_texture
We'll add a variant which accepts dmabufs soon.  Change
the name so we can easily disturgish the two variants.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-2-git-send-email-kraxel@redhat.com
2017-02-27 16:15:28 +01:00
Paolo Bonzini
3b1d816984 tests-aio-multithread: use atomic_read properly
nodes[id].next is written by other threads.  If atomic_read is not used
(matching atomic_set in mcs_mutex_lock!) the compiler can optimize the
whole "if" away!

Reported-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Greg Kurz <groug@kaod.org>
Message-id: 20170227111726.9237-1-pbonzini@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-27 12:54:08 +00:00
Peter Maydell
d992f2f136 Merge remote-tracking branch 'remotes/artyom/tags/pull-sun4v-20170226' into staging
Pull request for Niagara patches 2017 02 26

# gpg: Signature made Sun 26 Feb 2017 21:56:06 GMT
# gpg:                using RSA key 0x3360C3F7411A125F
# gpg: Good signature from "Artyom Tarasenko <atar4qemu@gmail.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2AD8 6149 17F4 B2D7 05C0  BB12 3360 C3F7 411A 125F

* remotes/artyom/tags/pull-sun4v-20170226:
  niagara: check if a serial port is available
  niagara: fail if a firmware file is missing

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 22:40:23 +00:00
Artyom Tarasenko
a5a08302d4 niagara: check if a serial port is available
Reported-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Artyom Tarasenko <atar4qemu@gmail.com>
2017-02-26 22:46:08 +01:00
Artyom Tarasenko
5e3a549498 niagara: fail if a firmware file is missing
fail if a firmware file is missing and not qtest_enabled(),
the later is necessary to allow some basic tests if
firmware is not available

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Artyom Tarasenko <atar4qemu@gmail.com>
2017-02-26 22:44:25 +01:00
Peter Maydell
685783c5b6 Merge remote-tracking branch 'remotes/thibault/tags/samuel-thibault' into staging
slirp updates

# gpg: Signature made Sun 26 Feb 2017 14:40:00 GMT
# gpg:                using RSA key 0xB0A51BF58C9179C5
# gpg: Good signature from "Samuel Thibault <samuel.thibault@aquilenet.fr>"
# gpg:                 aka "Samuel Thibault <sthibault@debian.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@gnu.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@inria.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@labri.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@ens-lyon.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@u-bordeaux.fr>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 900C B024 B679 31D4 0F82  304B D017 8C76 7D06 9EE6
#      Subkey fingerprint: AEBF 7448 FAB9 453A 4552  390E B0A5 1BF5 8C91 79C5

* remotes/thibault/tags/samuel-thibault:
  slirp: tcp_listen(): Don't try to close() an fd we never opened
  slirp: Convert mbufs to use g_malloc() and g_free()
  slirp: Check qemu_socket() return value in udp_listen()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 16:38:40 +00:00
Peter Maydell
bd5d2353aa slirp: tcp_listen(): Don't try to close() an fd we never opened
Coverity points out (CID 1005725) that an error-exit path in tcp_listen()
will try to close(s) even if the reason it got there was that the
qemu_socket() failed and s was never opened.  Not only that, this isn't even
the right function to use, because we need closesocket() to do the right
thing on Windows.  Change to using the right function and only calling it if
needed.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-02-26 15:39:29 +01:00
Peter Maydell
70f2e64e4d slirp: Convert mbufs to use g_malloc() and g_free()
The mbuf code currently doesn't check the result of doing a malloc()
or realloc() of its data (spotted by Coverity, CID 1238946).
Since the m_inc() API assumes that extending an mbuf must succeed,
just convert to g_malloc() and g_free().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-02-26 15:39:05 +01:00
Peter Maydell
4577b09a27 slirp: Check qemu_socket() return value in udp_listen()
Check the return value from qemu_socket() rather than trying to
pass it to bind() as an fd argument even if it's negative.
This wouldn't have caused any negative consequences, because
it won't be a valid fd number and the bind call will fail;
but Coverity complains (CID 1005723).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-02-26 15:38:38 +01:00
Peter Maydell
6b4e463ff3 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Fri 24 Feb 2017 18:08:26 GMT
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  tests: Use opened block node for block job tests
  vvfat: Use opened node as backing file
  block: Add bdrv_new_open_driver()
  block: Factor out bdrv_open_driver()
  block: Use BlockBackend for image probing
  block: Factor out bdrv_open_child_bs()
  block: Attach bs->file only during .bdrv_open()
  block: Pass BdrvChild to bdrv_truncate()
  mirror: Resize active commit base in mirror_run()
  qcow2: Use BB for resizing in qcow2_amend_options()
  blockdev: Use BlockBackend to resize in qmp_block_resize()
  iotests: Fix another race in 030
  qemu-img: Improve documentation for PREALLOC_MODE_FALLOC
  qemu-img: Truncate before full preallocation
  qemu-img: Add tests for raw image preallocation
  qemu-img: Do not truncate before preallocation
  qemu-iotests: redirect nbd server stdout to /dev/null
  qemu-iotests: add ability to exclude certain protocols from tests
  qemu-iotests: Test 137 only supports 'file' protocol

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 12:26:37 +00:00
Peter Maydell
6528a4c1f2 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Fri 24 Feb 2017 17:45:53 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  RBD: Add support readv,writev for rbd
  block/nfs: try to avoid the bounce buffer in pwritev
  block/nfs: convert to preadv / pwritev

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 11:47:00 +00:00
Peter Maydell
6d3f4c6d1d Merge remote-tracking branch 'remotes/yongbok/tags/mips-20170224-2' into staging
MIPS patches 2017-02-24-2

CHanges:
* Add the Boston board with fixing the make check issue on 32-bit hosts.

# gpg: Signature made Fri 24 Feb 2017 11:43:45 GMT
# gpg:                using RSA key 0x2238EB86D5F797C2
# gpg: Good signature from "Yongbok Kim <yongbok.kim@imgtec.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 8600 4CF5 3415 A5D9 4CFA  2B5C 2238 EB86 D5F7 97C2

* remotes/yongbok/tags/mips-20170224-2:
  hw/mips: MIPS Boston board support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 21:15:14 +00:00
Peter Maydell
28f997a82c Merge remote-tracking branch 'remotes/stsquad/tags/pull-mttcg-240217-1' into staging
This is the MTTCG pull-request as posted yesterday.

# gpg: Signature made Fri 24 Feb 2017 11:17:51 GMT
# gpg:                using RSA key 0xFBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>"
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8  DF35 FBD0 DB09 5A9E 2A44

* remotes/stsquad/tags/pull-mttcg-240217-1: (24 commits)
  tcg: enable MTTCG by default for ARM on x86 hosts
  hw/misc/imx6_src: defer clearing of SRC_SCR reset bits
  target-arm: ensure all cross vCPUs TLB flushes complete
  target-arm: don't generate WFE/YIELD calls for MTTCG
  target-arm/powerctl: defer cpu reset work to CPU context
  cputlb: introduce tlb_flush_*_all_cpus[_synced]
  cputlb: atomically update tlb fields used by tlb_reset_dirty
  cputlb: add tlb_flush_by_mmuidx async routines
  cputlb and arm/sparc targets: convert mmuidx flushes from varg to bitmap
  cputlb: introduce tlb_flush_* async work.
  cputlb: tweak qemu_ram_addr_from_host_nofail reporting
  cputlb: add assert_cpu_is_self checks
  tcg: handle EXCP_ATOMIC exception for system emulation
  tcg: enable thread-per-vCPU
  tcg: enable tb_lock() for SoftMMU
  tcg: remove global exit_request
  tcg: drop global lock during TCG code execution
  tcg: rename tcg_current_cpu to tcg_current_rr_cpu
  tcg: add kick timer for single-threaded vCPU emulation
  tcg: add options for enabling MTTCG
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 18:43:52 +00:00
Peter Maydell
2421f381dc Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20170224' into staging
A selection of s390x patches:
- cleanups, fixes and improvements
- program check loop detection (useful with the corresponding kernel
  patch)
- wire up virtio-crypto for ccw
- and finally support many virtqueues for virtio-ccw

# gpg: Signature made Fri 24 Feb 2017 09:19:19 GMT
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20170224:
  s390x/css: handle format-0 TIC CCW correctly
  s390x/arch_dump: pass cpuid into notes sections
  s390x/arch_dump: use proper note name and note size
  virtio-ccw: support VIRTIO_QUEUE_MAX virtqueues
  s390x: bump ADAPTER_ROUTES_MAX_GSI
  virtio-ccw: check flic->adapter_routes_max_batch
  s390x: add property adapter_routes_max_batch
  virtio-ccw: Check the number of vqs in CCW_CMD_SET_IND
  virtio-ccw: add virtio-crypto-ccw device
  virtio-ccw: handle virtio 1 only devices
  s390x/flic: fail migration on source already
  s390x/kvm: detect some program check loops
  s390x/s390-virtio: get rid of DPRINTF

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 17:48:49 +00:00
Peter Maydell
f62ab6bb8f Merge remote-tracking branch 'remotes/famz/tags/for-upstream' into staging
Docker testing and shippable patches

Hi Peter,

These are testing and build automation patches:

- Shippable.com powered CI config
- Docker cross build
- Fixes and MAINTAINERS tweaks.

# gpg: Signature made Fri 24 Feb 2017 06:31:10 GMT
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/for-upstream:
  docker: Install python2 explicitly in docker image
  MAINTAINERS: merge Build and test automation with Docker tests
  .shippable.yml: new CI provider
  new: debian docker targets for cross-compiling
  tests/docker: add basic user mapping support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 16:37:32 +00:00
Peter Maydell
d7941f4eed Merge remote-tracking branch 'remotes/armbru/tags/pull-util-2017-02-23' into staging
option cutils: Fix and clean up number conversions

# gpg: Signature made Thu 23 Feb 2017 19:41:17 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-util-2017-02-23: (24 commits)
  option: Fix checking of sizes for overflow and trailing crap
  util/cutils: Change qemu_strtosz*() from int64_t to uint64_t
  util/cutils: Return qemu_strtosz*() error and value separately
  util/cutils: Let qemu_strtosz*() optionally reject trailing crap
  qemu-img: Wrap cvtnum() around qemu_strtosz()
  test-cutils: Drop suffix from test_qemu_strtosz_simple()
  test-cutils: Use qemu_strtosz() more often
  util/cutils: Drop QEMU_STRTOSZ_DEFSUFFIX_* macros
  util/cutils: New qemu_strtosz()
  util/cutils: Rename qemu_strtosz() to qemu_strtosz_MiB()
  util/cutils: New qemu_strtosz_metric()
  test-cutils: Cover qemu_strtosz() around range limits
  test-cutils: Cover qemu_strtosz() with trailing crap
  test-cutils: Cover qemu_strtosz() invalid input
  test-cutils: Add missing qemu_strtosz()... endptr checks
  option: Fix to reject invalid and overflowing numbers
  util/cutils: Clean up control flow around qemu_strtol() a bit
  util/cutils: Clean up variable names around qemu_strtol()
  util/cutils: Rename qemu_strtoll(), qemu_strtoull()
  util/cutils: Rewrite documentation of qemu_strtol() & friends
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 18:34:27 +00:00
tianqing
1d393bdeae RBD: Add support readv,writev for rbd
Rbd can do readv and writev directly, so wo do not need to transform
iov to buf or vice versa any more.

Signed-off-by: tianqing <tianqing@unitedstack.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-24 12:43:01 -05:00
Peter Lieven
ef503a8417 block/nfs: try to avoid the bounce buffer in pwritev
if the passed qiov contains exactly one iov we can
pass the buffer directly.

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1487349541-10201-3-git-send-email-pl@kamp.de
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-24 12:38:35 -05:00
Peter Lieven
69785a229d block/nfs: convert to preadv / pwritev
Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1487349541-10201-2-git-send-email-pl@kamp.de
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-24 12:38:35 -05:00
Peter Maydell
6959e4523e Merge remote-tracking branch 'remotes/awilliam/tags/vfio-updates-20170223.0' into staging
VFIO updates 2017-02-23

 - Report qdev_unplug errors (Alex Williamson)
 - Fix ecap ID 0 handling, improve comment (Alex Williamson)
 - Disable IGD stolen memory in UPT mode too (Xiong Zhang)

# gpg: Signature made Thu 23 Feb 2017 19:04:17 GMT
# gpg:                using RSA key 0x239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B  8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-updates-20170223.0:
  vfio/pci-quirks.c: Disable stolen memory for igd VFIO
  vfio/pci: Improve extended capability comments, skip masked caps
  vfio/pci: Report errors from qdev_unplug() via device request

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 17:27:59 +00:00
Kevin Wolf
d185cf0ec6 tests: Use opened block node for block job tests
blk_insert_bs() and block job related functions will soon require an
opened block node (permission calculations will involve the block
driver), so let our tests be consistent with the real users in this
respect.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
a8a4d15c1c vvfat: Use opened node as backing file
We should not try to assign a not yet opened node as the backing file,
because as soon as the permission system is added it will fail.  The
just added bdrv_new_open_driver() function is the right tool to open a
file with an internal driver, use it.

In case anyone wonders whether that magic fake backing file to trigger a
special action on 'commit' actually works today: No, not for me. One
reason is that we've been adding a raw format driver on top for several
years now and raw doesn't support commit. Other reasons include that the
backing file isn't writable and the driver doesn't support reopen, and
it's also size 0 and the driver doesn't support bdrv_truncate. All of
these are easily fixable, but then 'commit' ended up in an infinite loop
deep in the vvfat code for me, so I thought I'd best leave it alone. I'm
not really sure what it was supposed to do anyway.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
680c7f9606 block: Add bdrv_new_open_driver()
This function allows to create more or less normal BlockDriverStates
even for BlockDrivers that aren't globally registered (e.g. helper
filters for block jobs).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
01a5650179 block: Factor out bdrv_open_driver()
This is a function that doesn't do any option parsing, but just does
some basic BlockDriverState setup and calls the .bdrv_open() function of
the block driver.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
5696c6e350 block: Use BlockBackend for image probing
This fixes the use of a parent-less BdrvChild in bdrv_open_inherit() by
converting it into a BlockBackend. Which is exactly what it should be,
image probing is an external, standalone user of a node. The requests
can't be considered to originate from the format driver node because
that one isn't even opened yet.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
2d6b86af14 block: Factor out bdrv_open_child_bs()
This is the part of bdrv_open_child() that opens a BDS with option
inheritance, but doesn't attach it as a child to the parent yet.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
4e4bf5c42c block: Attach bs->file only during .bdrv_open()
The way that attaching bs->file worked was a bit unusual in that it was
the only child that would be attached to a node which is not opened yet.
Because of this, the block layer couldn't know yet which permissions the
driver would eventually need.

This patch moves the point where bs->file is attached to the beginning
of the individual .bdrv_open() implementations, so drivers already know
what they are going to do with the child. This is also more consistent
with how driver-specific children work.

For a moment, bdrv_open() gets its own BdrvChild to perform image
probing, but instead of directly assigning this BdrvChild to the BDS, it
becomes a temporary one and the node name is passed as an option to the
drivers, so that they can simply use bdrv_open_child() to create another
reference for their own use.

This duplicated child for (the not opened yet) bs is not the final
state, a follow-up patch will change the image probing code to use a
BlockBackend, which is completely independent of bs.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
52cdbc5869 block: Pass BdrvChild to bdrv_truncate()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
becc347e1c mirror: Resize active commit base in mirror_run()
This is more consistent with the commit block job, and it moves the code
to a place where we already have the necessary BlockBackends to resize
the base image when bdrv_truncate() is changed to require a BdrvChild.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
70b27f3643 qcow2: Use BB for resizing in qcow2_amend_options()
In order to able to convert bdrv_truncate() to take a BdrvChild and
later to correctly check the resize permission here, we need to use a
BlockBackend for resizing the image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
7dad9ee646 blockdev: Use BlockBackend to resize in qmp_block_resize()
In order to be able to do permission checking and to keep working with
the BdrvChild based bdrv_truncate() that this involves, we need to
create a temporary BlockBackend to resize the image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
John Snow
2c3b44da07 iotests: Fix another race in 030
We can't rely on a non-paused job to be present and running for us.
Assume that if the job is not present that it completed already.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:23 +01:00
Nir Soffer
c6ccc2c5e6 qemu-img: Improve documentation for PREALLOC_MODE_FALLOC
Now that we are truncating the file in both PREALLOC_MODE_FULL and
PREALLOC_MODE_OFF, not truncating in PREALLOC_MODE_FALLOC looks odd.
Add a comment explaining why we do not truncate in this case.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:23 +01:00
Nir Soffer
5a1dad9d5a qemu-img: Truncate before full preallocation
In a previous commit (qemu-img: Do not truncate before preallocation) we
moved truncate to the PREALLOC_MODE_OFF branch to avoid slowdown in
posix_fallocate().

However this change is not optimal when using PREALLOC_MODE_FULL, since
knowing the final size from the beginning could allow the file system
driver to do less allocations and possibly avoid fragmentation of the
file.

Now we truncate also before doing full preallocation.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:23 +01:00
Nir Soffer
6f993f3fca qemu-img: Add tests for raw image preallocation
Add tests for creating raw image with and without the preallocation
option.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Nir Soffer
f6a7240442 qemu-img: Do not truncate before preallocation
When using file system that does not support fallocate() (e.g. NFS <
4.2), truncating the file only when preallocation=OFF speeds up creating
raw file.

Here is example run, tested on Fedora 24 machine, creating raw file on
NFS version 3 server.

$ time ./qemu-img-master create -f raw -o preallocation=falloc mnt/test 1g
Formatting 'mnt/test', fmt=raw size=1073741824 preallocation=falloc

real	0m21.185s
user	0m0.022s
sys	0m0.574s

$ time ./qemu-img-fix create -f raw -o preallocation=falloc mnt/test 1g
Formatting 'mnt/test', fmt=raw size=1073741824 preallocation=falloc

real	0m11.601s
user	0m0.016s
sys	0m0.525s

$ time dd if=/dev/zero of=mnt/test bs=1M count=1024 oflag=direct
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 15.6627 s, 68.6 MB/s

real	0m16.104s
user	0m0.009s
sys	0m0.220s

Running with strace we can see that without this change we do one
pread() and one pwrite() for each block. With this change, we do only
one pwrite() per block.

$ strace ./qemu-img-master create -f raw -o preallocation=falloc mnt/test 8192
...
pread64(9, "\0", 1, 4095)               = 1
pwrite64(9, "\0", 1, 4095)              = 1
pread64(9, "\0", 1, 8191)               = 1
pwrite64(9, "\0", 1, 8191)              = 1

$ strace ./qemu-img-fix create -f raw -o preallocation=falloc mnt/test 8192
...
pwrite64(9, "\0", 1, 4095)              = 1
pwrite64(9, "\0", 1, 8191)              = 1

This happens because posix_fallocate is checking if each block is
allocated before writing a byte to the block, and when truncating the
file before preallocation, all blocks are unallocated.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Jeff Cody
43421ea05f qemu-iotests: redirect nbd server stdout to /dev/null
Some iotests (e.g. 174) try to filter the output of _make_test_image by
piping the stdout.  Pipe the server stdout to /dev/null, so that filter
pipe does not need to wait until process completion.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Jeff Cody
dfac03dcd4 qemu-iotests: add ability to exclude certain protocols from tests
Add the ability for shell script tests to exclude specific
protocols.  This is useful to allow all protocols except ones known to
not support a feature used in the test (e.g. .bdrv_create).

Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Jeff Cody
2b12baf0e3 qemu-iotests: Test 137 only supports 'file' protocol
Since test 137 make uses of qcow2.py, only local files are supported.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Peter Maydell
fe8ee082db Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-02-22' into staging
QAPI patches for 2017-02-22

# gpg: Signature made Wed 22 Feb 2017 19:12:27 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2017-02-22:
  block: Don't bother asserting type of output visitor's output
  monitor: Clean up handle_hmp_command() a bit
  tests: Don't check qobject_type() before qobject_to_qbool()
  tests: Don't check qobject_type() before qobject_to_qfloat()
  tests: Don't check qobject_type() before qobject_to_qint()
  tests: Don't check qobject_type() before qobject_to_qstring()
  tests: Don't check qobject_type() before qobject_to_qlist()
  Don't check qobject_type() before qobject_to_qdict()
  test-qmp-event: Simplify and tighten event_test_emit()
  libqtest: Clean up qmp_response() a bit
  check-qjson: Simplify around compare_litqobj_to_qobj()
  check-qdict: Tighten qdict_crumple_test_recursive() some
  check-qdict: Simplify qdict_crumple_test_recursive()
  qdict: Make qdict_get_qlist() safe like qdict_get_qdict()
  net: Flatten simple union NetLegacyOptions
  numa: Flatten simple union NumaOptions

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 15:00:51 +00:00
Peter Maydell
63f495beb4 Merge remote-tracking branch 'remotes/kraxel/tags/pull-cve-2017-2620-20170224-1' into staging
cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo (CVE-2017-2620)

# gpg: Signature made Fri 24 Feb 2017 13:42:39 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-cve-2017-2620-20170224-1:
  cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo (CVE-2017-2620)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 13:55:26 +00:00
Gerd Hoffmann
92f2b88cea cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo (CVE-2017-2620)
CIRRUS_BLTMODE_MEMSYSSRC blits do NOT check blit destination
and blit width, at all.  Oops.  Fix it.

Security impact: high.

The missing blit destination check allows to write to host memory.
Basically same as CVE-2014-8106 for the other blit variants.

Cc: qemu-stable@nongnu.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-24 14:35:50 +01:00
Peter Maydell
5842b55fd4 Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20170223-1' into staging
usb: ohci bugfix, switch core to unrealize, xhci property cleanup

# gpg: Signature made Thu 23 Feb 2017 15:37:57 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20170223-1:
  xhci: properties cleanup
  usb: ohci: fix error return code in servicing td
  usb: replace handle_destroy with unrealize

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 12:49:04 +00:00
Paul Burton
df1d8a1f29 hw/mips: MIPS Boston board support
Introduce support for emulating the MIPS Boston development board. The
Boston board is built around an FPGA & 3 PCIe controllers, one of which
is connected to an Intel EG20T Platform Controller Hub. It is used
during the development & debug of new CPUs and the software intended to
run on them, and is essentially the successor to the older MIPS Malta
board.

This patch does not implement the EG20T, instead connecting an already
supported ICH-9 AHCI controller. Whilst this isn't accurate it's enough
for typical stock Boston software (eg. Linux kernels) to work with hard
disks given that both the ICH-9 & EG20T implement the AHCI
specification.

Boston boards typically boot kernels in the FIT image format, and this
patch will treat kernels provided to QEMU as such. When loading a kernel
directly, the board code will generate minimal firmware much as the
Malta board code does. This firmware will set up the CM, CPC & GIC
register base addresses then set argument registers & jump to the kernel
entry point. Alternatively, bootloader code may be loaded using the bios
argument in which case no firmware will be generated & execution will
proceed from the start of the boot code at the default MIPS boot
exception vector (offset 0x1fc00000 into (c)kseg1).

Currently real Boston boards are always used with FPGA bitfiles that
include a Global Interrupt Controller (GIC), so the interrupt
configuration is only defined for such cases. Therefore the board will
only allow use of CPUs which implement the CPS components, including the
GIC, and will otherwise exit with a message.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Yongbok Kim <yongbok.kim@imgtec.com>
[yongbok.kim@imgtec.com:
  isolated boston machine support for mips64el.
  updated for recent Chardev changes.
  ignore missing bios/kernel for qtest.
  added default -drive to if=ide explicitly.
  changed default memory size into 1G due to make check failure
  on 32-bit hosts]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-24 10:37:21 +00:00
Alex Bennée
ca759f9e38 tcg: enable MTTCG by default for ARM on x86 hosts
This enables the multi-threaded system emulation by default for ARMv7
and ARMv8 guests using the x86_64 TCG backend. This is because on the
guest side:

  - The ARM translate.c/translate-64.c have been converted to
    - use MTTCG safe atomic primitives
    - emit the appropriate barrier ops
  - The ARM machine has been updated to
    - hold the BQL when modifying shared cross-vCPU state
    - defer powerctl changes to async safe work

All the host backends support the barrier and atomic primitives but
need to provide same-or-better support for normal load/store
operations.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Acked-by: Peter Maydell <peter.maydell@linaro.org>
Tested-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
2017-02-24 10:32:46 +00:00
Alex Bennée
4881658a4b hw/misc/imx6_src: defer clearing of SRC_SCR reset bits
The arm_reset_cpu/set_cpu_on/set_cpu_off() functions do their work
asynchronously in the target vCPUs context. As a result we need to
ensure the SRC_SCR reset bits correctly report the reset status at the
right time. To do this we defer the clearing of the bit with an async
job which will run after the work queued by ARM powerctl functions.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
a67cf27727 target-arm: ensure all cross vCPUs TLB flushes complete
Previously flushes on other vCPUs would only get serviced when they
exited their TranslationBlocks. While this isn't overly problematic it
violates the semantics of TLB flush from the point of view of source
vCPU.

To solve this we call the cputlb *_all_cpus_synced() functions to do
the flushes which ensures all flushes are completed by the time the
vCPU next schedules its own work. As the TLB instructions are modelled
as CP writes the TB ends at this point meaning cpu->exit_request will
be checked before the next instruction is executed.

Deferring the work until the architectural sync point is a possible
future optimisation.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
c22edfebff target-arm: don't generate WFE/YIELD calls for MTTCG
The WFE and YIELD instructions are really only hints and in TCG's case
they were useful to move the scheduling on from one vCPU to the next. In
the parallel context (MTTCG) this just causes an unnecessary cpu_exit
and contention of the BQL.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
062ba099e0 target-arm/powerctl: defer cpu reset work to CPU context
When switching a new vCPU on we want to complete a bunch of the setup
work before we start scheduling the vCPU thread. To do this cleanly we
defer vCPU setup to async work which will run the vCPUs execution
context as the thread is woken up. The scheduling of the work will kick
the vCPU awake.

This avoids potential races in MTTCG system emulation.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
c3b9a07a33 cputlb: introduce tlb_flush_*_all_cpus[_synced]
This introduces support to the cputlb API for flushing all CPUs TLBs
with one call. This avoids the need for target helpers to iterate
through the vCPUs themselves.

An additional variant of the API (_synced) will cause the source vCPUs
work to be scheduled as "safe work". The result will be all the flush
operations will be complete by the time the originating vCPU executes
its safe work. The calling implementation can either end the TB
straight away (which will then pick up the cpu->exit_request on
entering the next block) or defer the exit until the architectural
sync point (usually a barrier instruction).

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
b0706b7167 cputlb: atomically update tlb fields used by tlb_reset_dirty
The main use case for tlb_reset_dirty is to set the TLB_NOTDIRTY flags
in TLB entries to force the slow-path on writes. This is used to mark
page ranges containing code which has been translated so it can be
invalidated if written to. To do this safely we need to ensure the TLB
entries in question for all vCPUs are updated before we attempt to run
the code otherwise a race could be introduced.

To achieve this we atomically set the flag in tlb_reset_dirty_range and
take care when setting it when the TLB entry is filled.

On 32 bit systems attempting to emulate 64 bit guests we don't even
bother as we might not have the atomic primitives available. MTTCG is
disabled in this case and can't be forced on. The copy_tlb_helper
function helps keep the atomic semantics in one place to avoid
confusion.

The dirty helper function is made static as it isn't used outside of
cputlb.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
e72184455c cputlb: add tlb_flush_by_mmuidx async routines
This converts the remaining TLB flush routines to use async work when
detecting a cross-vCPU flush. The only minor complication is having to
serialise the var_list of MMU indexes into a form that can be punted
to an asynchronous job.

The pending_tlb_flush field on QOM's CPU structure also becomes a
bitfield rather than a boolean.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
0336cbf853 cputlb and arm/sparc targets: convert mmuidx flushes from varg to bitmap
While the vargs approach was flexible the original MTTCG ended up
having munge the bits to a bitmap so the data could be used in
deferred work helpers. Instead of hiding that in cputlb we push the
change to the API to make it take a bitmap of MMU indexes instead.

For ARM some the resulting flushes end up being quite long so to aid
readability I've tended to move the index shifting to a new line so
all the bits being or-ed together line up nicely, for example:

    tlb_flush_page_by_mmuidx(other_cs, pageaddr,
                             (1 << ARMMMUIdx_S1SE1) |
                             (1 << ARMMMUIdx_S1SE0));

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
[AT: SPARC parts only]
Reviewed-by: Artyom Tarasenko <atar4qemu@gmail.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
[PM: ARM parts only]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
KONRAD Frederic
e3b9ca8109 cputlb: introduce tlb_flush_* async work.
Some architectures allow to flush the tlb of other VCPUs. This is not a problem
when we have only one thread for all VCPUs but it definitely needs to be an
asynchronous work when we are in true multithreaded work.

We take the tb_lock() when doing this to avoid racing with other threads
which may be invalidating TB's at the same time. The alternative would
be to use proper atomic primitives to clear the tlb entries en-mass.

This patch doesn't do anything to protect other cputlb function being
called in MTTCG mode making cross vCPU changes.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
[AJB: remove need for g_malloc on defer, make check fixes, tb_lock]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
857baec1d9 cputlb: tweak qemu_ram_addr_from_host_nofail reporting
This moves the helper function closer to where it is called and updates
the error message to report via error_report instead of the deprecated
fprintf.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
f0aff0f124 cputlb: add assert_cpu_is_self checks
For SoftMMU the TLB flushes are an example of a task that can be
triggered on one vCPU by another. To deal with this properly we need to
use safe work to ensure these changes are done safely. The new assert
can be enabled while debugging to catch these cases.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Pranith Kumar
08e73c48b0 tcg: handle EXCP_ATOMIC exception for system emulation
The patch enables handling atomic code in the guest. This should be
preferably done in cpu_handle_exception(), but the current assumptions
regarding when we can execute atomic sections cause a deadlock.

The current mechanism discards the flags which were set in atomic
execution. We ensure they are properly saved by calling the
cc->cpu_exec_enter/leave() functions around the loop.

As we are running cpu_exec_step_atomic() from the outermost loop we
need to avoid an abort() when single stepping over atomic code since
debug exception longjmp will point to the the setlongjmp in
cpu_exec(). We do this by setting a new jmp_env so that it jumps back
here on an exception.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
[AJB: tweak title, merge with new patches, add mmap_lock]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
CC: Paolo Bonzini <pbonzini@redhat.com>
2017-02-24 10:32:45 +00:00
Alex Bennée
372579427a tcg: enable thread-per-vCPU
There are a couple of changes that occur at the same time here:

  - introduce a single vCPU qemu_tcg_cpu_thread_fn

  One of these is spawned per vCPU with its own Thread and Condition
  variables. qemu_tcg_rr_cpu_thread_fn is the new name for the old
  single threaded function.

  - the TLS current_cpu variable is now live for the lifetime of MTTCG
    vCPU threads. This is for future work where async jobs need to know
    the vCPU context they are operating in.

The user to switch on multi-thread behaviour and spawn a thread
per-vCPU. For a simple test kvm-unit-test like:

  ./arm/run ./arm/locking-test.flat -smp 4 -accel tcg,thread=multi

Will now use 4 vCPU threads and have an expected FAIL (instead of the
unexpected PASS) as the default mode of the test has no protection when
incrementing a shared variable.

We enable the parallel_cpus flag to ensure we generate correct barrier
and atomic code if supported by the front and backends. This doesn't
automatically enable MTTCG until default_mttcg_enabled() is updated to
check the configuration is supported.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[AJB: Some fixes, conditionally, commit rewording]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
2f16960660 tcg: enable tb_lock() for SoftMMU
tb_lock() has long been used for linux-user mode to protect code
generation. By enabling it now we prepare for MTTCG and ensure all code
generation is serialised by this lock. The other major structure that
needs protecting is the l1_map and its PageDesc structures. For the
SoftMMU case we also use tb_lock() to protect these structures instead
of linux-user mmap_lock() which as the name suggests serialises updates
to the structure as a result of guest mmap operations.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
e5143e30fb tcg: remove global exit_request
There are now only two uses of the global exit_request left.

The first ensures we exit the run_loop when we first start to process
pending work and in the kick handler. This is just as easily done by
setting the first_cpu->exit_request flag.

The second use is in the round robin kick routine. The global
exit_request ensured every vCPU would set its local exit_request and
cause a full exit of the loop. Now the iothread isn't being held while
running we can just rely on the kick handler to push us out as intended.

We lightly re-factor the main vCPU thread to ensure cpu->exit_requests
cause us to exit the main loop and process any IO requests that might
come along. As an cpu->exit_request may legitimately get squashed
while processing the EXCP_INTERRUPT exception we also check
cpu->queued_work_first to ensure queued work is expedited as soon as
possible.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Jan Kiszka
8d04fb55de tcg: drop global lock during TCG code execution
This finally allows TCG to benefit from the iothread introduction: Drop
the global mutex while running pure TCG CPU code. Reacquire the lock
when entering MMIO or PIO emulation, or when leaving the TCG loop.

We have to revert a few optimization for the current TCG threading
model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not
kicking it in qemu_cpu_kick. We also need to disable RAM block
reordering until we have a more efficient locking mechanism at hand.

Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here.
These numbers demonstrate where we gain something:

20338 jan       20   0  331m  75m 6904 R   99  0.9   0:50.95 qemu-system-arm
20337 jan       20   0  331m  75m 6904 S   20  0.9   0:26.50 qemu-system-arm

The guest CPU was fully loaded, but the iothread could still run mostly
independent on a second core. Without the patch we don't get beyond

32206 jan       20   0  330m  73m 7036 R   82  0.9   1:06.00 qemu-system-arm
32204 jan       20   0  330m  73m 7036 S   21  0.9   0:17.03 qemu-system-arm

We don't benefit significantly, though, when the guest is not fully
loading a host CPU.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com>
[FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex]
Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
[EGC: fixed iothread lock for cpu-exec IRQ handling]
Signed-off-by: Emilio G. Cota <cota@braap.org>
[AJB: -smp single-threaded fix, clean commit msg, BQL fixes]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
[PM: target-arm changes]
Acked-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:45 +00:00
Alex Bennée
791158d93b tcg: rename tcg_current_cpu to tcg_current_rr_cpu
..and make the definition local to cpus. In preparation for MTTCG the
concept of a global tcg_current_cpu will no longer make sense. However
we still need to keep track of it in the single-threaded case to be able
to exit quickly when required.

qemu_cpu_kick_no_halt() moves and becomes qemu_cpu_kick_rr_cpu() to
emphasise its use-case. qemu_cpu_kick now kicks the relevant cpu as
well as qemu_kick_rr_cpu() which will become a no-op in MTTCG.

For the time being the setting of the global exit_request remains.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
2017-02-24 10:32:45 +00:00
Alex Bennée
6546706d28 tcg: add kick timer for single-threaded vCPU emulation
Currently we rely on the side effect of the main loop grabbing the
iothread_mutex to give any long running basic block chains a kick to
ensure the next vCPU is scheduled. As this code is being re-factored and
rationalised we now do it explicitly here.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
2017-02-24 10:32:45 +00:00
KONRAD Frederic
8d4e9146b3 tcg: add options for enabling MTTCG
We know there will be cases where MTTCG won't work until additional work
is done in the front/back ends to support. It will however be useful to
be able to turn it on.

As a result MTTCG will default to off unless the combination is
supported. However the user can turn it on for the sake of testing.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
[AJB: move to -accel tcg,thread=multi|single, defaults]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
2093714314 tcg: move TCG_MO/BAR types into own file
We'll be using the memory ordering definitions to define values for
both the host and guest. To avoid fighting with circular header
dependencies just move these types into their own minimal header.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Pranith Kumar
4ec667042d mttcg: Add missing tb_lock/unlock() in cpu_exec_step()
The recent patch enabling lock assertions uncovered the missing lock
acquisition in cpu_exec_step(). This patch adds them.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Pranith Kumar
6ac3d7e845 mttcg: translate-all: Enable locking debug in a debug build
Enable tcg lock debug asserts in a debug build by default instead of
relying on DEBUG_LOCKING. None of the other DEBUG_* macros have
asserts, so this patch removes DEBUG_LOCKING and enable these asserts
in a debug build.

CC: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
[AJB: tweak ifdefs so can be early in series]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
c6489dd921 docs: new design document multi-thread-tcg.txt
This documents the current design for upgrading TCG emulation to take
advantage of modern CPUs by running a thread-per-CPU. The document goes
through the various areas of the code affected by such a change and
proposes design requirements for each part of the solution.

The text marked with (Current solution[s]) to document what the current
approaches being used are.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Peter Maydell
5522924718 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.9-20170222' into staging
ppc patch queue for 2017-02-22

This pull request has:
   * Yet more POWER9 instruction implementations
   * Some extensions to the softfloat code which are necesssary for
     some of those instructions
   * Some preliminary patches in preparation for POWER9 softmmu
     implementation
   * Igor Mammedov's cleanups to unify hotplug cpu handling across
     architectures
   * Assorted bugfixes

The softfloat and cpu hotplug changes aren't entirely ppc specific (in
fact the hotplug stuff contains some pc specific patches).  However
they're included here because ppc is one of the main beneficiaries,
and the series depend on some ppc specific patches.

# gpg: Signature made Wed 22 Feb 2017 06:29:47 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.9-20170222: (43 commits)
  hw/ppc/ppc405_uc.c: Avoid integer overflows
  hw/ppc/spapr: Check for valid page size when hot plugging memory
  target-ppc: fix Book-E TLB matching
  hw/net/spapr_llan: 6 byte mac address device tree entry
  machine: replace query_hotpluggable_cpus() callback with has_hotpluggable_cpus flag
  machine: unify [pc_|spapr_]query_hotpluggable_cpus() callbacks
  spapr: reuse machine->possible_cpus instead of cores[]
  change CPUArchId.cpu type to Object*
  pc: pass apic_id to pc_find_cpu_slot() directly so lookup could be done without CPU object
  pc: calculate topology only once when possible_cpus is initialised
  pc: move pcms->possible_cpus init out of pc_cpus_init()
  machine: move possible_cpus to MachineState
  hw/pci-host/prep: Do not use hw_error() in realize function
  target/ppc/POWER9: Direct all instr and data storage interrupts to the hypv
  target/ppc/POWER9: Adapt LPCR handling for POWER9
  target/ppc/POWER9: Add ISAv3.00 MMU definition
  target/ppc: Fix LPCR DPFD mask define
  target-ppc: Add xscvqpudz and xscvqpuwz instructions
  target-ppc: Implement round to odd variants of quad FP instructions
  softfloat: Add float128_to_uint32_round_to_zero()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:13:57 +00:00
Dong Jia Shi
9f94f84ce7 s390x/css: handle format-0 TIC CCW correctly
For TIC CCW, bit positions 8-32 of the format-1 CCW must contain zeros;
otherwise, a program-check condition is generated. For format-0 TIC CCWs,
bits 32-63 are ignored.

To convert TIC from format-0 CCW to format-1 CCW correctly, let's clear
bits 8-32 to guarantee compatibility.

Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Christian Borntraeger
f738f296ea s390x/arch_dump: pass cpuid into notes sections
we need to pass the cpuid into the pid field of the notes
section, otherwise the notes for different CPUs all have 0:

e.g. objdump -h shows:
old:
  5 .reg-s390-prefix/0 00000004  0000000000000000  0000000000000000
  6 .reg-s390-prefix 00000004  0000000000000000  0000000000000000
 21 .reg-s390-prefix/0 00000004  0000000000000000  0000000000000000
new:
  5 .reg-s390-prefix/1 00000004  0000000000000000  0000000000000000
  6 .reg-s390-prefix 00000004  0000000000000000  0000000000000000
 21 .reg-s390-prefix/2 00000004  0000000000000000  0000000000000000

Reported-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Christian Borntraeger
5f706fdc16 s390x/arch_dump: use proper note name and note size
In binutils/libbfd (bfd/elf.c) it is enforced that all s390
specific ELF notes like e.g. NT_S390_PREFIX or NT_S390_CTRS
have "LINUX" specified as note name and that the namesz is
6. Otherwise the notes are ignored.

QEMU currently uses "CORE" for these notes. Up to now this has
not been a real problem because the dump analysis tool "crash"
does handle that. But it will break all programs that use libbfd
for processing ELF notes.

So fix this and use "LINUX" for all s390 specific notes to comply
with libbfd. Also set the correct namesz.

Reported-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
b1914b824a virtio-ccw: support VIRTIO_QUEUE_MAX virtqueues
The maximal number of virtqueues per device can be limited on a per
transport basis. For virtio-ccw this limit is defined by
VIRTIO_CCW_QUEUE_MAX, however the limitation used to come form the
number of adapter routes supported by flic (via notifiers).

Recently the limitation of the flic was adjusted so that it can
accommodate VIRTIO_QUEUE_MAX queues, and is in the meanwhile checked for
separately too.

Let us remove the transport specific limitation of virtio-ccw by
dropping VIRTIO_CCW_QUEUE_MAX and using VIRTIO_QUEUE_MAX instead.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
069097dad3 s390x: bump ADAPTER_ROUTES_MAX_GSI
Let's increase ADAPTER_ROUTES_MAX_GSI to VIRTIO_QUEUE_MAX which is the
largest demand foreseeable at the moment. Let us add a compatibility
macro for the previous machines so client code can maintain backwards
migration compatibility

To not mess up migration compatibility for virtio-ccw
VIRTIO_CCW_QUEUE_MAX is left at it's current value, and will be dropped
when virtio-ccw is converted to use the capability of the flic
introduced by this patch.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
0708afa704 virtio-ccw: check flic->adapter_routes_max_batch
Currently VIRTIO_CCW_QUEUE_MAX is defined as ADAPTER_ROUTES_MAX_GSI.
That is when checking queue max we implicitly check the constraint
concerning the number of adapter routes. This won't be satisfactory any
more (due to backward migration considerations) if ADAPTER_ROUTES_MAX_GSI
changes (ADAPTER_ROUTES_MAX_GSI is going to change because we want to
support up to VIRTIO_QUEUE_MAX queues per virtio-ccw device).

Let us introduce a check on a recently introduce flic property which
gives us the compatibility machine aware limit on adapter routes.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
e61cc6b5c6 s390x: add property adapter_routes_max_batch
To make virtio-ccw supports more that  64 virtqueues we will have to
increase ADAPTER_ROUTES_MAX_GSI which is currently limiting the number if
possible adapter routes. Of course increasing the number of supported
routes can break backwards migration.

Let us introduce a compatibility property adapter_routes_max_batch so
client code can use the some old limit if in compatibility mode and
retain the migration compatibility.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
797b608638 virtio-ccw: Check the number of vqs in CCW_CMD_SET_IND
We cannot support more than 64 virtqueues with the 64 bits provided by
classic indicators. If a driver tries to setup classic indicators
(which it is free to do even for virtio-1 devices) for a device with
more than 64 virtqueues, we should reject the attempt so that the
driver does not end up with an unusable device.

This is in preparation for bumping the number of supported virtqueues
on the ccw transport.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
d2256070d2 virtio-ccw: add virtio-crypto-ccw device
Wire up virtio-crypto for the CCW based VIRTIO.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
47e13dfd86 virtio-ccw: handle virtio 1 only devices
As a preparation for wiring-up virtio-crypto, the first non-transitional
virtio device on the ccw transport, let us introduce a mechanism for
disabling revision 0.  This is more or less equivalent with disabling
legacy as revision 0 is legacy only, and legacy drivers use the revision
0 exclusively.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Cornelia Huck
ba690c7171 s390x/flic: fail migration on source already
Current code puts a 'FLIC_FAILED' marker into the migration stream
to indicate something went wrong while saving flic state and fails
load if it encounters that marker. VMState's put routine recently
gained the ability to return error codes (but did not wire it up
yet).

In order to be able to reap the benefits of returning an error and
failing migration on the source already once this gets wired up
in core, return an error in addition to storing 'FLIC_FAILED'.

Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-02-24 10:15:18 +01:00
Christian Borntraeger
409422cd83 s390x/kvm: detect some program check loops
Sometimes (e.g. early boot) a guest is broken in such ways that it loops
100% delivering operation exceptions (illegal operation) but the pgm new
PSW is not set properly. This will result in code being read from
address zero, which usually contains another illegal op. Let's detect
this case and put the guest in crashed state. Instead of only detecting
this for address zero apply a heuristic that will work for any program
check new psw so that it will also reach the crashed state if you
provide some random elf file to the -kernel option.
We do not want guest problem state to be able to trigger a guest panic,
e.g. by faulting on an address that is the same as the program check
new PSW, so we check for the problem state bit being off.

With this we
a: get rid of CPU consumption of such broken guests
b: keep the program old PSW. This allows to find out the original illegal
   operation - making debugging such early boot issues much easier than
   with single stepping

This relies on the kernel using a similar heuristic and passing such
operation exceptions to user space.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Cornelia Huck
94b5024b1f s390x/s390-virtio: get rid of DPRINTF
The DPRINTF approach is likely to introduce bitrot, and the preferred
way for debugging is tracing anyway. Fortunately, there are no users
(left), so nuke it.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-02-24 10:15:18 +01:00
Fam Zheng
a8f159d45b docker: Install python2 explicitly in docker image
Python is no longer installed implicitly, but the QEMU build system
requires it. List it in PACKAGES.

Reported-by: Auger Eric <eric.auger@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170222021801.28658-1-famz@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
e70dc7f854 MAINTAINERS: merge Build and test automation with Docker tests
The docker framework is really just another piece in the build
automation puzzle so lets merge it together. For added bonus I've also
included the Travis and Patchew status links. The Shippable links will
be added later once mainline tests have been configured and setup.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170220105139.21581-5-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
d92d886a3b .shippable.yml: new CI provider
Ostensibly Shippable offers a similar set of services as Travis.
However they are focused on Docker container based work-flows so we
can use our existing containers to run a few extra builds - in this
case a bunch of cross-compiled targets on a Debian multiarch system.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170220105139.21581-4-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
24e0131f37 new: debian docker targets for cross-compiling
This provides a basic Debian install with access to the emdebian cross
compilers. The debian-armhf-cross and debian-arm64-cross targets build
on the basic Debian image to allow cross compiling to those targets.

A new environment variable (QEMU_CONFIGURE_OPTS) is set as part of the
docker container and passed to the build to specify the
--cross-prefix. The user still calls the build in the usual way, for
example:

  make docker-test-build@debian-arm64-cross \
    TARGET_LIST="aarch64-softmmu,aarch64-linux-user"

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170220105139.21581-3-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
414a8ce57e tests/docker: add basic user mapping support
Currently all docker builds are done by exporting a tarball to the
docker container and running the build as the containers root user.
Other use cases are possible however and it is possible to map a part
of users file-system to the container. This is useful for example for
doing cross-builds of arbitrary source trees. For this to work
smoothly the container needs to have a user created that maps cleanly
to the host system.

This adds a -u option to the docker script so that:

  DEB_ARCH=armhf DEB_TYPE=stable ./tests/docker/docker.py build \
    -u --include-executable=arm-linux-user/qemu-arm \
    debian:armhf ./tests/docker/dockerfiles/debian-bootstrap.docker

Will build a container that can then be run like:

  docker run --rm -it -v /home/alex/lsrc/qemu/risu.git/:/src \
    --user=alex:alex -w /src/ debian:armhf \
    sh -c "make clean && ./configure -s && make"

All docker containers built will add the current user unless
explicitly disabled by specifying NOUSER when invoking the Makefile:

  make docker-image-debian-armhf-cross NOUSER=1

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170220105139.21581-2-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Markus Armbruster
75cdcd1553 option: Fix checking of sizes for overflow and trailing crap
parse_option_size()'s checking for overflow and trailing crap is
wrong.  Has always been that way.  qemu_strtosz() gets it right, so
use that.

This adds support for size suffixes 'P', 'E', and ignores case for all
suffixes, not just 'k'.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-25-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
f46bfdbfc8 util/cutils: Change qemu_strtosz*() from int64_t to uint64_t
This will permit its use in parse_option_size().

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com> (maintainer:X86)
Cc: Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
Cc: Max Reitz <mreitz@redhat.com> (supporter:Block layer core)
Cc: qemu-block@nongnu.org (open list:Block layer core)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1487708048-2131-24-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
f17fd4fdf0 util/cutils: Return qemu_strtosz*() error and value separately
This makes qemu_strtosz(), qemu_strtosz_mebi() and
qemu_strtosz_metric() similar to qemu_strtoi64(), except negative
values are rejected.

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com> (maintainer:X86)
Cc: Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
Cc: Max Reitz <mreitz@redhat.com> (supporter:Block layer core)
Cc: qemu-block@nongnu.org (open list:Block layer core)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1487708048-2131-23-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
4fcdf65ae2 util/cutils: Let qemu_strtosz*() optionally reject trailing crap
Change the qemu_strtosz() & friends to return -EINVAL when @endptr is
null and the conversion doesn't consume the string completely.
Matches how qemu_strtol() & friends work.

Only test_qemu_strtosz_simple() passes a null @endptr.  No functional
change there, because its conversion consumes the string.

Simplify callers that use @endptr only to fail when it doesn't point
to '\0' to pass a null @endptr instead.

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com> (maintainer:X86)
Cc: Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
Cc: Max Reitz <mreitz@redhat.com> (supporter:Block layer core)
Cc: qemu-block@nongnu.org (open list:Block layer core)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1487708048-2131-22-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
606caa0a2a qemu-img: Wrap cvtnum() around qemu_strtosz()
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Max Reitz <mreitz@redhat.com>
Cc: qemu-block@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-21-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
dab9cc9237 test-cutils: Drop suffix from test_qemu_strtosz_simple()
Leave testing unit suffixes to test_qemu_strtosz_units().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-20-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
753f8da0e0 test-cutils: Use qemu_strtosz() more often
Use qemu_strtosz() instead of qemu_strtosz_MiB() where it doesn't
really make a difference.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-19-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
17f942560e util/cutils: Drop QEMU_STRTOSZ_DEFSUFFIX_* macros
Writing QEMU_STRTOSZ_DEFSUFFIX_* instead of '*' gains nothing.  Get
rid of these eyesores.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-18-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
466dea14e6 util/cutils: New qemu_strtosz()
Most callers of qemu_strtosz_suffix() pass QEMU_STRTOSZ_DEFSUFFIX_B.
Capture the pattern in new qemu_strtosz().

Inline qemu_strtosz_suffix() into its only remaining caller.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-17-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
e591591b32 util/cutils: Rename qemu_strtosz() to qemu_strtosz_MiB()
With qemu_strtosz(), no suffix means mebibytes.  It's used rarely.
I'm going to add a similar function where no suffix means bytes.
Rename qemu_strtosz() to qemu_strtosz_MiB() to make the name
qemu_strtosz() available for the new function.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-16-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
d2734d2629 util/cutils: New qemu_strtosz_metric()
To parse numbers with metric suffixes, we use

    qemu_strtosz_suffix_unit(nptr, &eptr, QEMU_STRTOSZ_DEFSUFFIX_B, 1000)

Capture this in a new function for legibility:

    qemu_strtosz_metric(nptr, &eptr)

Replace test_qemu_strtosz_suffix_unit() by test_qemu_strtosz_metric().

Rename qemu_strtosz_suffix_unit() to do_strtosz() and give it internal
linkage.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-15-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
0b742797aa test-cutils: Cover qemu_strtosz() around range limits
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-14-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
a6b4373fa2 test-cutils: Cover qemu_strtosz() with trailing crap
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-13-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
18aec47967 test-cutils: Cover qemu_strtosz() invalid input
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-12-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
019144b286 test-cutils: Add missing qemu_strtosz()... endptr checks
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-11-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
3403e5eb88 option: Fix to reject invalid and overflowing numbers
parse_option_number() fails to check for these errors after
strtoull().  Has always been broken.  Fix that.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-10-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
4baef2679e util/cutils: Clean up control flow around qemu_strtol() a bit
Reorder check_strtox_error() to make it obvious that we always store
through a non-null @endptr.

Transform

    if (some error) {
        error case ...
        err = value for error case;
    } else {
        normal case ...
        err = value for normal case;
    }
    return err;

to

    if (some error) {
        error case ...
        return value for error case;
    }
    normal case ...
    return value for normal case;

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-9-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
717adf9609 util/cutils: Clean up variable names around qemu_strtol()
Name same things the same, different things differently.

* qemu_strtol()'s parameter @nptr is called @p in
  check_strtox_error().  Rename the latter.

* qemu_strtol()'s parameter @endptr is called @next in
  check_strtox_error().  Rename the latter.

* qemu_strtol()'s variable @p is called @endptr in
  check_strtox_error().  Rename both to @ep.

* qemu_strtol()'s variable @err is *negative* errno,
  check_strtox_error()'s parameter @err is *positive*.  Rename the
  latter to @libc_errno.

Same for qemu_strtoul(), qemu_strtoi64(), qemu_strtou64(), of course.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-8-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
b30d188677 util/cutils: Rename qemu_strtoll(), qemu_strtoull()
The name qemu_strtoll() suggests conversion to long long, but it
actually converts to int64_t.  Rename to qemu_strtoi64().

The name qemu_strtoull() suggests conversion to unsigned long long,
but it actually converts to uint64_t.  Rename to qemu_strtou64().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-7-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
4295f879be util/cutils: Rewrite documentation of qemu_strtol() & friends
Fixes the following documentation bugs:

* Fails to document that null @nptr is safe.

* Fails to document that we return -EINVAL when no conversion could be
  performed (commit 47d4be1).

* Confuses long long with int64_t, and unsigned long long with
  uint64_t.

* Claims the unsigned conversions can underflow.  They can't.

While there, mark problematic assumptions that int64_t is long long,
and uint64_t is unsigned long long with FIXME comments.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-6-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
bc7c08a2c3 test-cutils: Clean up qemu_strtoul() result checks
Use unsigned comparisons to check the result of qemu_strtoul() and
strtoull().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-5-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
73245450b3 test-cutils: Add missing qemu_strtol()... endptr checks
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-4-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
8ee8409eff option: Assert value string isn't null
Plenty of code relies on QemuOpt member @str not being null, including
qemu_opts_print(), qemu_opts_to_qdict(), and callbacks passed to
qemu_opt_foreach().

Begs the question whether it can be null.  Only opt_set() creates
QemuOpt.  It sets member @str to its argument @value.  Passing null
for @value would plant a time bomb.  Callers:

* opts_do_parse() can't pass null.

* qemu_opt_set() passes its argument @value.  Callers:

  - qemu_opts_from_qdict_1() can't pass null

  - qemu_opts_set() passes its argument @value, but none of its
    callers pass null.

  - Many more outside qemu-option.c, but they shouldn't pass null,
    either.

Assert member @str isn't null, so that misuse is caught right away.

Simplify parse_option_bool(), parse_option_number() and
parse_option_size() accordingly.  Best viewed with whitespace changes
ignored.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-3-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
694baf57ae test-qemu-opts: Cover qemu_opts_parse()
The new tests demonstrate a few bugs, all clearly marked.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-2-git-send-email-armbru@redhat.com>
[A few additional test cases squashed in, see
Message-ID: <871supjijq.fsf@dusky.pond.sub.org>]
2017-02-23 20:34:24 +01:00
Peter Maydell
2d896b454a Revert "hw/mips: MIPS Boston board support"
This reverts commit d3473e147a.

This commit creates a board which defaults to having 2GB of RAM.
Unfortunately on 32-bit hosts we can't create boards with 2GB of RAM,
and so 'make check' fails. I missed this during testing of the
merge, unfortunately. Luckily the offending commit is the last
one in the merge request, so we can just revert it for now.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 18:04:45 +00:00
Gerd Hoffmann
4f72b8d2a6 xhci: properties cleanup
Split xhci properties into common and nec specific.

Move the backward compat flags to nec, so the new qemu-xhci
devices doesn't carry on the compatibiity stuff.

Move the msi/msix switches too and just enable msix for qemu-xhci.

Also move the intrs and slots properties.  Wasn't a great idea to
make them configurable in the first place, nobody needs this.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1487663432-10410-1-git-send-email-kraxel@redhat.com
2017-02-23 16:18:03 +01:00
Li Qiang
6ebc069d67 usb: ohci: fix error return code in servicing td
It should return 1 if an error occurs when reading td.
This will avoid an infinite loop issue in ohci_service_ed_list.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1487760990-115925-1-git-send-email-liqiang6-s@360.cn
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-23 16:18:03 +01:00
Marc-André Lureau
c4fe9700e6 usb: replace handle_destroy with unrealize
Curiously, unrealize() is not being used, but it seems more
appropriate than handle_destroy() together with realize(). It is more
ubiquitous destroy name in qemu code base and may throw errors.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170221141451.28305-25-marcandre.lureau@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-23 15:40:19 +01:00
Peter Maydell
10f25e4844 Merge remote-tracking branch 'remotes/yongbok/tags/mips-20170222' into staging
MIPS patches 2017-02-22

Changes:
* Add MIPS Boston board support

# gpg: Signature made Wed 22 Feb 2017 00:08:00 GMT
# gpg:                using RSA key 0x2238EB86D5F797C2
# gpg: Good signature from "Yongbok Kim <yongbok.kim@imgtec.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 8600 4CF5 3415 A5D9 4CFA  2B5C 2238 EB86 D5F7 97C2

* remotes/yongbok/tags/mips-20170222:
  hw/mips: MIPS Boston board support
  hw: xilinx-pcie: Add support for Xilinx AXI PCIe Controller
  loader: Support Flattened Image Trees (FIT images)
  dtc: Update requirement to v1.4.2
  target-mips: Provide function to test if a CPU supports an ISA
  hw/mips_gic: Update pin state on mask changes
  hw/mips_gictimer: provide API for retrieving frequency
  hw/mips_cmgcr: allow GCR base to be moved

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 09:59:40 +00:00
XiongZhang
c2b2e158cc vfio/pci-quirks.c: Disable stolen memory for igd VFIO
Regardless of running in UPT or legacy mode, the guest igd
drivers may attempt to use stolen memory, however only legacy
mode has BIOS support for reserving stolen memmory in the
guest VM. We zero out the stolen memory size in all cases,
then guest igd driver won't use stolen memory.
In legacy mode, user could use x-igd-gms option to specify the
amount of stolen memory which will be pre-allocated and reserved
by bios for igd use.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99028
          https://bugs.freedesktop.org/show_bug.cgi?id=99025

Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Tested-by: Terrence Xu <terrence.xu@intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-22 13:19:59 -07:00
Alex Williamson
d0d1cd70d1 vfio/pci: Improve extended capability comments, skip masked caps
Since commit 4bb571d857 ("pci/pcie: don't assume cap id 0 is
reserved") removes the internal use of extended capability ID 0, the
comment here becomes invalid.  However, peeling back the onion, the
code is still correct and we still can't seed the capability chain
with ID 0, unless we want to muck with using the version number to
force the header to be non-zero, which is much uglier to deal with.
The comment also now covers some of the subtleties of using cap ID 0,
such as transparently indicating absence of capabilities if none are
added.  This doesn't detract from the correctness of the referenced
commit as vfio in the kernel also uses capability ID zero to mask
capabilties.  In fact, we should skip zero capabilities precisely
because the kernel might also expose such a capability at the head
position and re-introduce the problem.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Tested-by: Peter Xu <peterx@redhat.com>
Reported-by: Jintack Lim <jintack@cs.columbia.edu>
Tested-by: Jintack Lim <jintack@cs.columbia.edu>
2017-02-22 13:19:58 -07:00
Alex Williamson
35c7cb4caf vfio/pci: Report errors from qdev_unplug() via device request
Currently we ignore this error, report it with error_reportf_err()

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-02-22 13:19:58 -07:00
Markus Armbruster
7c81e4e9db block: Don't bother asserting type of output visitor's output
After a visit of a complex QAPI type FOO

    ov = qobject_output_visitor_new(&foo);
    visit_type_FOO(ov, NULL, expr, &error_abort);
    visit_complete(ov, &foo);

we can safely assume qobject_type(foo) is QTYPE_QDICT.  We do in many
places, but occasionally assert qobject_type(obj) == QTYPE_QDICT.
Don't.  The appropriate place to check such fundamental properties of
QAPI visitors is the test suite.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-15-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:20 +01:00
Markus Armbruster
bbf1028a0a monitor: Clean up handle_hmp_command() a bit
Leave checking qobject_type(req) to qmp_check_input_obj().  Rework
handling of json_parser_parse_err() failing without setting an error.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-14-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:17 +01:00
Markus Armbruster
dfad9ec4e9 tests: Don't check qobject_type() before qobject_to_qbool()
qobject_to_qbool(obj) returns NULL when obj isn't a QBool.  Check
that instead of qobject_type(obj) == QTYPE_QBOOL.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-13-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:14 +01:00
Markus Armbruster
8978b34af3 tests: Don't check qobject_type() before qobject_to_qfloat()
qobject_to_qfloat(obj) returns NULL when obj isn't a QFloat.  Check
that instead of qobject_type(obj) == QTYPE_QFLOAT.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-12-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:11 +01:00
Markus Armbruster
0abfc4b885 tests: Don't check qobject_type() before qobject_to_qint()
qobject_to_qint(obj) returns NULL when obj isn't a QInt.  Check
that instead of qobject_type(obj) == QTYPE_QINT.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-11-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:09 +01:00
Markus Armbruster
363e13f86e tests: Don't check qobject_type() before qobject_to_qstring()
qobject_to_qstring(obj) returns NULL when obj isn't a QString.  Check
that instead of qobject_type(obj) == QTYPE_QSTRING.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-10-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:06 +01:00
Markus Armbruster
cd17ba51f5 tests: Don't check qobject_type() before qobject_to_qlist()
qobject_to_qlist(obj) returns NULL when obj isn't a QList.  Check
that instead of qobject_type(obj) == QTYPE_QLIST.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-9-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:04 +01:00
Markus Armbruster
ca6b6e1e68 Don't check qobject_type() before qobject_to_qdict()
qobject_to_qdict(obj) returns NULL when obj isn't a QDict.  Check
that instead of qobject_type(obj) == QTYPE_QDICT.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-8-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:01 +01:00
Markus Armbruster
4b32e11a59 test-qmp-event: Simplify and tighten event_test_emit()
Use qdict_get_qdict() and qdict_get_try_int() to simplify.

While there, add a sanity check for seconds.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-7-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:59 +01:00
Markus Armbruster
4d96f329cc libqtest: Clean up qmp_response() a bit
Use qobject_to_qdict() instead of a type cast.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-6-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:56 +01:00
Markus Armbruster
9eaaf97168 check-qjson: Simplify around compare_litqobj_to_qobj()
Make compare_litqobj_to_qobj() cope with null, and drop non-null
assertions from callers.

compare_litqobj_to_qobj() already checks the QType matches; drop the
redundant assertions from callers.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-5-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:54 +01:00
Markus Armbruster
a68931ea5f check-qdict: Tighten qdict_crumple_test_recursive() some
Consistently check for unexpected QDict entries, and qdict_get_qdict()
success.  The latter doesn't tighten the test, it only makes it fail
more nicely.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-4-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:51 +01:00
Markus Armbruster
ff9d38963e check-qdict: Simplify qdict_crumple_test_recursive()
Use qdict_get_qdict(), qdict_get_qlist() instead of qdict_get()
followed by qobject_to_qdict(), qobject_to_qlist().

While there, drop some redundant code.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-3-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:48 +01:00
Markus Armbruster
b25f23e7db qdict: Make qdict_get_qlist() safe like qdict_get_qdict()
Commit 89cad9f changed qdict_get_qdict() to return NULL instead of
crash when the key doesn't exist or its value isn't a QDict.
Commit 2d6421a neglected to do the same for qdict_get_qlist().
Correct that, and update the function comments.

qdict_get_obj() is now unused, remove.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-2-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:37 +01:00
Markus Armbruster
d3be4b57ce net: Flatten simple union NetLegacyOptions
Simple unions are simpler than flat unions in the schema, but more
complicated in C and on the QMP wire: there's extra indirection in C
and extra nesting on the wire, both pointless.  They're best avoided
in new code.

NetLegacyOptions isn't new, but it's only used internally, not in QMP.
Convert it to a flat union.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487709988-14322-3-git-send-email-armbru@redhat.com>
2017-02-22 19:50:52 +01:00
Markus Armbruster
d081a49af8 numa: Flatten simple union NumaOptions
Simple unions are simpler than flat unions in the schema, but more
complicated in C and on the QMP wire: there's extra indirection in C
and extra nesting on the wire, both pointless.  They're best avoided
in new code.

NumaOptions isn't new, but it's only used internally, not in QMP.
Convert it to a flat union.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487709988-14322-2-git-send-email-armbru@redhat.com>
2017-02-22 19:50:46 +01:00
Peter Maydell
fb6971c110 hw/ppc/ppc405_uc.c: Avoid integer overflows
When performing clock calculations, the ppc405_uc code
has several places where it multiplies together two
32-bit variables and assigns the result to a 64-bit
variable. This doesn't quite do what is intended because
C will compute a 32-bit multiply result. Add casts to
ensure we don't truncate the result.

(Spotted by Coverity, CID 1005504, 1005505.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Thomas Huth
df58713396 hw/ppc/spapr: Check for valid page size when hot plugging memory
On POWER, the valid page sizes that the guest can use are bound
to the CPU and not to the memory region. QEMU already has some
fancy logic to find out the right maximum memory size to tell
it to the guest during boot (see getrampagesize() in the file
target/ppc/kvm.c for more information).
However, once we're booted and the guest is using huge pages
already, it is currently still possible to hot-plug memory regions
that does not support huge pages - which of course does not work
on POWER, since the guest thinks that it is possible to use huge
pages everywhere. The KVM_RUN ioctl will then abort with -EFAULT,
QEMU spills out a not very helpful error message together with
a register dump and the user is annoyed that the VM unexpectedly
died.
To avoid this situation, we should check the page size of hot-plugged
DIMMs to see whether it is possible to use it in the current VM.
If it does not fit, we can print out a better error message and
refuse to add it, so that the VM does not die unexpectely and the
user has a second chance to plug a DIMM with a matching memory
backend instead.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1419466
Signed-off-by: Thomas Huth <thuth@redhat.com>
[dwg: Fix a build error on 32-bit builds with KVM]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Alex Zuepke
0a4c774086 target-ppc: fix Book-E TLB matching
The Book-E TLB matching process should bail out early when a TLB
entry matches, but the access permissions are wrong. The CPU
will then raise a DSI error instead of a Data TLB error, as
described for TLB matching in Freescale and IBM documents.

Signed-off-by: Alex Zuepke <azu@sysgo.de>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Sam Bobroff
87684b4c40 hw/net/spapr_llan: 6 byte mac address device tree entry
The spapr-vlan device in QEMU has always presented it's MAC address in
the device tree as an 8 byte value, even though PAPR requires it to be
6 bytes.  This is because, at the time, AIX required the value to be 8
bytes.  However, modern versions of AIX support the (correct) 6
byte value so they no longer require the workaround.

It would be neatest to always provide a 6 byte value but that would
cause a problem with old Linux kernel ibmveth drivers, so the old 8
byte value is still presented when necessary.

Since commit 13f85203e (3.10, May 2013) the driver has been able to
handle 6 or 8 byte addresses so versions after that don't need to be
considered specially.

Drivers from kernels before that can also handle either type of
address, but not always:
* If the first byte's lowest bits are 10, the address must be 6 bytes.
* Otherwise, the address must be 8 bytes.
(The two bits in question are significant in a MAC address: they
indicate a locally-administered unicast address.)

So to maintain compatibility the old 8 byte value is presented when
the lowest two bits of the first byte are not 10.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Igor Mammedov
c5514d0e4b machine: replace query_hotpluggable_cpus() callback with has_hotpluggable_cpus flag
Generic helper machine_query_hotpluggable_cpus() replaced
target specific query_hotpluggable_cpus() callbacks so
there is no need in it anymore. However inon NULL callback
value is used to detect/report hotpluggable cpus support,
therefore it can be removed completely.
Replace it with MachineClass.has_hotpluggable_cpus boolean
which is sufficient for the task.

Suggested-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
f2d672c248 machine: unify [pc_|spapr_]query_hotpluggable_cpus() callbacks
All callbacks FOO_query_hotpluggable_cpus() are practically
the same except of setting vcpus_count to different values.
Convert them to a generic machine_query_hotpluggable_cpus()
callback by moving vcpus_count initialization to per machine
specific callback possible_cpu_arch_ids().

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
535455fdee spapr: reuse machine->possible_cpus instead of cores[]
Replace SPAPR specific cores[] array with generic
machine->possible_cpus and store core objects there.
It makes cores bookkeeping similar to x86 cpus and
will allow to unify similar code.
It would allow to replace cpu_index based NUMA node
mapping with iproperty based one (for -device created
cores) since possible_cpus carries board defined
topology/layout.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
8aba384298 change CPUArchId.cpu type to Object*
so it could be reused for SPAPR cores as well

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
1ea69c0e25 pc: pass apic_id to pc_find_cpu_slot() directly so lookup could be done without CPU object
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
c67ae9333c pc: calculate topology only once when possible_cpus is initialised
Fill in CpuInstanceProperties once at board init time and
just copy them whenever query_hotpluggable_cpus() is called.
It will keep topology info always available without need
to recalculate it every time it's needed.
Considering it has NUMA node id, it will be used to keep
NUMA node to cpu mapping instead of numa_info[i].node_cpu
bitmasks.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
c96a1c0ba6 pc: move pcms->possible_cpus init out of pc_cpus_init()
possible_cpus could be initialized earlier then cpu objects,
i.e. when -smp is parsed so move init code to possible_cpu_arch_ids()
interface func and do initialization on the first call.

it should help later with making -numa cpu/-smp parsing a machine state
properties.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
38690a1ca7 machine: move possible_cpus to MachineState
so that it would be possible to reuse it with
spapr/virt-aarch64 targets.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Thomas Huth
fb38ebfbfe hw/pci-host/prep: Do not use hw_error() in realize function
hw_error() is for CPU related errors only (it prints out a
register dump and calls abort()), so we should not use it
if we just failed to load the bios image. Apart from that,
realize() functions should not exit directly but always set
the errp with error_setg() in case of errors instead.
Additionally, move some code around and delete the bios memory
subregion again in case of such an error, so that we leave a
clean state when returning to the caller.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
5065908361 target/ppc/POWER9: Direct all instr and data storage interrupts to the hypv
The vpm0 bit was removed from the LPCR in POWER9, this bit controlled
whether ISI and DSI interrupts were directed to the hypervisor or the
partition. These interrupts now go to the hypervisor irrespective, thus
it is no longer necessary to check the vmp0 bit in the LPCR.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
18aa49ecf4 target/ppc/POWER9: Adapt LPCR handling for POWER9
The logical partitioning control register controls a threads operation
based on the partition it is currently executing. Add new definitions and
update the mask used when writing to the LPCR based on the POWER9 spec.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
86cf1e9fe8 target/ppc/POWER9: Add ISAv3.00 MMU definition
POWER9 processors implement the mmu as defined in version 3.00 of the ISA.

Add a definition for this mmu model and set the POWER9 cpu model to use
this mmu model.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
7659ca1a3e target/ppc: Fix LPCR DPFD mask define
The DPFD field in the LPCR is 3 bits wide. This has always been defined
as 0x3 << shift which indicates a 2 bit field, which is incorrect.
Correct this.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
e0aee726bf target-ppc: Add xscvqpudz and xscvqpuwz instructions
xscvqpudz: VSX Scalar truncate & Convert Quad-Precision format to
           Unsigned Doubleword format
xscvqpuwz: VSX Scalar truncate & Convert Quad-Precision format to
           Unsigned Word format

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
a8d411abac target-ppc: Implement round to odd variants of quad FP instructions
xsaddqpo:  VSX Scalar Add Quad-Precision using round to Odd
xsmulqo:   VSX Scalar Multiply Quad-Precision using round to Odd
xsdivqpo:  VSX Scalar Divide Quad-Precision using round to Odd
xscvqpdpo: VSX Scalar round & Convert Quad-Precision format to
           Double-Precision format using round to Odd
xssqrtqpo: VSX Scalar Square Root Quad-Precision using round to Odd
xssubqpo:  VSX Scalar Subtract Quad-Precision using round to Odd

In addition, fix the invalid bitmask in the instruction encoding
of xssqrtqp[o].

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
CC: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
fd425037d2 softfloat: Add float128_to_uint32_round_to_zero()
float128_to_uint32_round_to_zero() is needed by xscvqpuwz instruction
of PowerPC ISA 3.0.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
2e6d856835 softfloat: Add float128_to_uint64_round_to_zero()
Implement float128_to_uint64() and use that to implement
float128_to_uint64_round_to_zero()

This is required by xscvqpudz instruction of PowerPC ISA 3.0.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
9ee6f678f4 softfloat: Add round-to-odd rounding mode
Power ISA 3.0 introduces a few quadruple precision floating point
instructions that support round-to-odd rounding mode. The
round-to-odd mode is explained as under:

Let Z be the intermediate arithmetic result or the operand of a convert
operation. If Z can be represented exactly in the target format, the
result is Z. Otherwise the result is either Z1 or Z2 whichever is odd.
Here Z1 and Z2 are the next larger and smaller numbers representable
in the target format respectively.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Laurent Vivier
5b929608b9 spapr: replace debug printf with trace points
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Laurent Vivier
f4af7d4438 ppc4xx: replace debug printf with trace points
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Laurent Vivier
5283c27fc5 mac99: replace debug printf with trace points
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Sam Bobroff
2635531f20 target-ppc, tcg: fix usermode segfault with pthread_create()
Programs run under qemu-ppc64 on an x86_64 host currently segfault
if they use pthread_create() due to the adjustment made to the NIP in
commit bd6fefe71c.

This patch changes cpu_loop() to set the NIP back to the
pre-incremented value before calling do_syscall(), which causes the
correct address to be used for the new thread and corrects the fault.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
c09cec683b target-ppc: add wait instruction
Use the available wait instruction implementation.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
62d897ca8b target-ppc: add slbsync implementation
slbsync: SLB Synchoronize

The instruction provides an ordering function for the effects of all
slbieg instructions executed by the thread executing the slbsync
instruction.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
a63f1dfc62 target-ppc: add slbieg instruction
slbieg: SLB Invalidate Entry Global

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
80b8c1ee05 target-ppc: generate exception for copy/paste
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Balamuruhan S
a34011881c target-ppc: implement store atomic instruction
stwat: Store Word Atomic
stdat: Store Doubleword Atomic

The instruction includes as function code (5 bits) which gives a detail
on the operation to be performed. The patch implements five such
functions.

Signed-off-by: Balamuruhan S <bala24@linux.vnet.ibm.com>
Signed-off-by: Harish S <harisrir@linux.vnet.ibm.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[ implement stdat, use macro and combine both implementation ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Balamuruhan S
a68a614673 target-ppc: implement load atomic instruction
lwat: Load Word Atomic
ldat: Load Doubleword Atomic

The instruction includes as function code (5 bits) which gives a detail
on the operation to be performed. The patch implements five such
functions.

Signed-off-by: Balamuruhan S <bala24@linux.vnet.ibm.com>
Signed-off-by: Harish S <harisrir@linux.vnet.ibm.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[ combine both lwat/ldat implementation using macro ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Sam Bobroff
fe93e3e6ec spapr: fix off-by-one error in spapr_ovec_populate_dt()
The last byte of the option vector was missing due to an off-by-one
error. Without this fix, client architecture support negotiation will
fail because the last byte of option vector 5, which contains the MMU
support, will be missed.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Bharata B Rao
d4ccd87e68 target-ppc: Add xsmaxjdp and xsminjdp instructions
xsmaxjdp: VSX Scalar Maximum Type-J Double-Precision
xsminjdp: VSX Scalar Minimum Type-J Double-Precision

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Bharata B Rao
2770deede0 target-ppc: Add xsmaxcdp and xsmincdp instructions
xsmaxcdp: VSX Scalar Maximum Type-C Double-Precision
xsmincdp: VSX Scalar Minimum Type-C Double-Precision

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Thomas Huth
802fc7abd0 hw/ppc/pnv: Remove superfluous "qemu" prefix from error strings
error_report() already puts a prefix with the program name in front
of the error strings, so the "qemu:" prefix is not necessary here
anymore.

Reported-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
f6b99afdc3 ppc: implement xssubqp instruction
xssubqp: VSX Scalar Subtract Quad-Precision.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
a4a68476de ppc: implement xssqrtqp instruction
xssqrtqp: VSX Scalar Square Root Quad-Precision.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
917950d7f5 ppc: implement xsrqpxp instruction
xsrqpxp: VSX Scalar Round Quad-Precision to Double-Extended Precision.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
be07ad5842 ppc: implement xsrqpi[x] instruction
xsrqpi[x]: VSX Scalar Round to Quad-Precision Integer
[with Inexact].

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Igor Mammedov
115debf26c spapr: make cpu core unplug follow expected hotunplug call flow
spapr_core_unplug() were essentially spapr_core_unplug_request()
handler that requested CPU removal and registered callback
which did actual cpu core removali but it was called from
spapr_machine_device_unplug() which is intended for actual object
removal. Commit (cf632463 spapr: Memory hot-unplug support)
sort of fixed it introducing spapr_machine_device_unplug_request()
and calling spapr_core_unplug() but it hasn't renamed callback and
by mistake calls it from spapr_machine_device_unplug().

However spapr_machine_device_unplug() isn't ever called for
cpu core since spapr_core_release() doesn't follow expected
hotunplug call flow which is:
 1: device_del() ->
        hotplug_handler_unplug_request() ->
            set destroy_cb()
 2: destroy_cb() ->
        hotplug_handler_unplug() ->
            object_unparent // actual device removal

Fix it by renaming spapr_core_unplug() to spapr_core_unplug_request()
which is called from spapr_machine_device_unplug_request() and
making spapr_core_release() call hotplug_handler_unplug() which
will call spapr_machine_device_unplug() -> spapr_core_unplug()
to remove cpu core.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reveiwed-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Igor Mammedov
ff9006ddbf spapr: move spapr_core_[foo]plug() callbacks close to machine code in spapr.c
spapr_core_pre_plug/spapr_core_plug/spapr_core_unplug() are managing
wiring CPU core into spapr machine state and not internal CPU core state.
So move them from spapr_cpu_core.c to spapr.c where other similar
(spapr_memory_[foo]plug()) callbacks are located, which also matches
x86 target practice.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Igor Mammedov
f844616bf6 spapr: cpu core: separate child threads destruction from machine state operations
Split off destroying VCPU threads from drc callback
spapr_core_release() into new spapr_cpu_core_unrealizefn()
which takes care of internal cpu core state cleanup (i.e.
VCPU threads) and is called when object_unparent(core)
is called.

That leaves spapr_core_release() only with board mgmt
code, which will be moved to board related file in
follow up patch along with the rest on hotplug callbacks.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Paul Burton
d3473e147a hw/mips: MIPS Boston board support
Introduce support for emulating the MIPS Boston development board. The
Boston board is built around an FPGA & 3 PCIe controllers, one of which
is connected to an Intel EG20T Platform Controller Hub. It is used
during the development & debug of new CPUs and the software intended to
run on them, and is essentially the successor to the older MIPS Malta
board.

This patch does not implement the EG20T, instead connecting an already
supported ICH-9 AHCI controller. Whilst this isn't accurate it's enough
for typical stock Boston software (eg. Linux kernels) to work with hard
disks given that both the ICH-9 & EG20T implement the AHCI
specification.

Boston boards typically boot kernels in the FIT image format, and this
patch will treat kernels provided to QEMU as such. When loading a kernel
directly, the board code will generate minimal firmware much as the
Malta board code does. This firmware will set up the CM, CPC & GIC
register base addresses then set argument registers & jump to the kernel
entry point. Alternatively, bootloader code may be loaded using the bios
argument in which case no firmware will be generated & execution will
proceed from the start of the boot code at the default MIPS boot
exception vector (offset 0x1fc00000 into (c)kseg1).

Currently real Boston boards are always used with FPGA bitfiles that
include a Global Interrupt Controller (GIC), so the interrupt
configuration is only defined for such cases. Therefore the board will
only allow use of CPUs which implement the CPS components, including the
GIC, and will otherwise exit with a message.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Yongbok Kim <yongbok.kim@imgtec.com>
[yongbok.kim@imgtec.com:
  isolated boston machine support for mips64el.
  updated for recent Chardev changes.
  ignore missing bios/kernel for qtest.
  added default -drive to if=ide explicitly]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 23:49:30 +00:00
Paul Burton
62be393423 hw: xilinx-pcie: Add support for Xilinx AXI PCIe Controller
Add support for emulating the Xilinx AXI Root Port Bridge for PCI
Express as described by Xilinx' PG055 document. This is a PCIe
controller that can be used with certain series of Xilinx FPGAs, and is
used on the MIPS Boston board which will make use of this code.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
[yongbok.kim@imgtec.com:
  removed returning on !level,
  updated IRQ connection with GPIO logic,
  moved xilinx_pcie_init() to boston.c
  replaced stw_le_p() with pci_set_word()
  and other cosmetic changes]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 23:49:29 +00:00
Paul Burton
51b58561c1 loader: Support Flattened Image Trees (FIT images)
Introduce support for loading Flattened Image Trees, as used by modern
U-Boot. FIT images are essentially flattened device tree files which
contain binary images such as kernels, FDTs or ramdisks along with one
or more configuration nodes describing boot configurations.

The MIPS Boston board typically boots kernels in the form of FIT images,
and will make use of this code.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
[yongbok.kim@imgtec.com:
  fixed potential memory leaks,
  isolated building option]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 23:47:40 +00:00
Paul Burton
6e85fce022 dtc: Update requirement to v1.4.2
In order to obtain fdt_first_subnode & fdt_next_subnode symbols from
libfdt for use by a later patch, bump the requirement for dtc to v1.4.2
& the submodule to that same version.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Yongbok Kim <yongbok.kim@imgtec.com>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
bed9e5ceb1 target-mips: Provide function to test if a CPU supports an ISA
Provide a new cpu_supports_isa function which allows callers to
determine whether a CPU supports one of the ISA_ flags, by testing
whether the associated struct mips_def_t sets the ISA flags in its
insn_flags field.

An example use of this is to allow boards which generate bootloader code
to determine the properties of the CPU that will be used, for example
whether the CPU is 64 bit or which architecture revision it implements.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
2e2a1b4648 hw/mips_gic: Update pin state on mask changes
If the GIC interrupt mask is changed by a write to the smask (set mask)
or rmask (reset mask) registers, we need to re-evaluate the state of the
pins/IRQs fed to the CPU. Without doing so we risk leaving a pin high
despite the interrupt that led to that state being masked, or losing
interrupts if an already pending interrupt is unmasked.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
eb90ab9437 hw/mips_gictimer: provide API for retrieving frequency
Provide a new function mips_gictimer_get_freq() which returns the
frequency at which a GIC timer will count. This will be useful for
boards which perform setup based upon this frequency.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
08944be1d9 hw/mips_cmgcr: allow GCR base to be moved
Support moving the GCR base address & updating the CPU's CP0 CMGCRBase
register appropriately. This is required if a platform needs to move its
GCRs away from other memory, as the MIPS Boston development board does
to avoid its flash memory.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Peter Maydell
e295a154c2 Merge remote-tracking branch 'remotes/dgilbert/tags/pull-hmp-20170221' into staging
HMP pull

Note, I had seen a fail in the vhost-user/flags-mismatch on one
host in one build, but not others with the same patches; and these patches
go nowhere near that, so I think that's a separate vhost-user issue.

# gpg: Signature made Tue 21 Feb 2017 18:49:25 GMT
# gpg:                using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-hmp-20170221:
  monitor: Fix crashes when using HMP commands without CPU
  monitor: add poll-* properties into query-iothreads result
  hmp: fix block_set_io_throttle

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 19:41:57 +00:00
Thomas Huth
854e67fea6 monitor: Fix crashes when using HMP commands without CPU
When running certain HMP commands ("info registers", "info cpustats",
"info tlb", "nmi", "memsave" or dumping virtual memory) with the "none"
machine, QEMU crashes with a segmentation fault. This happens because the
"none" machine does not have any CPUs by default, but these HMP commands
did not check for a valid CPU pointer yet. Add such checks now, so we get
an error message about the missing CPU instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1484309555-1935-1-git-send-email-thuth@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-21 18:29:01 +00:00
Pavel Hrdina
5fc00480ab monitor: add poll-* properties into query-iothreads result
IOthreads were recently extended by new properties that can
enable/disable and configure aio polling.  This will also allow
other tools that uses QEMU to probe for existence of those new
properties via query-qmp-schema.

Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
Message-Id: <3163c16d6ab4257f7be9ad44fe9cc0ce8c359e5a.1486718555.git.phrdina@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-21 18:29:01 +00:00
Eric Blake
3f35c3b166 hmp: fix block_set_io_throttle
Commit 7a9877a made the 'device' parameter to BlockIOThrottle
optional, favoring 'id' instead.  But it forgot to update the
HMP usage to set has_device, which makes all attempts to change
throttling via HMP fail with "Need exactly one of 'device' and 'id'"

CC: qemu-stable@nongnu.org
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170120230359.4244-1-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-21 18:29:00 +00:00
Peter Maydell
796b288f7b Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Tue 21 Feb 2017 15:40:05 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  qemu-options: Fix broken sheepdog URL
  mirror: do not increase offset during initial zero_or_discard phase
  QAPI: Fix blockdev-add example documentation
  iscsi: Add blockdev-add support
  iscsi: Add timeout option
  iscsi: Add header-digest option
  iscsi: Add initiator-name option
  iscsi: Handle -iscsi user/password in bdrv_parse_filename()
  iscsi: Split URL into individual options

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 15:48:22 +00:00
Thomas Huth
6135c5e126 qemu-options: Fix broken sheepdog URL
The sheepdog URL is broken twice: First it uses a duplicated
http:// prefix, second the website seems to have moved to
https://sheepdog.github.io/sheepdog/ instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:38:09 -05:00
Anton Nefedov
90ab48eb07 mirror: do not increase offset during initial zero_or_discard phase
If explicit zeroing out before mirroring is required for the target image,
it moves the block job offset counter to EOF, then offset and len counters
count the image size twice. There is no harm but stats are confusing,
specifically the progress of the operation is always reported as 99% by
management tools.

The patch skips offset increase for the first "technical" pass over the
image. This should not cause any further harm.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1486045515-8009-1-git-send-email-den@openvz.org
CC: Jeff Cody <jcody@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:38:00 -05:00
Jeff Cody
b166099712 QAPI: Fix blockdev-add example documentation
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:46 -05:00
Kevin Wolf
31eb1202d3 iscsi: Add blockdev-add support
This adds blockdev-add support for iscsi devices.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:34 -05:00
Kevin Wolf
1d56010482 iscsi: Add timeout option
This was previously only available with -iscsi. Again, after this patch,
the -iscsi option only takes effect if an URL is given. New users are
supposed to use the new driver-specific option.

All -iscsi options have a corresponding driver-specific option for the
iscsi block driver now.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:26 -05:00
Kevin Wolf
81aa2a0fb5 iscsi: Add header-digest option
This was previously only available with -iscsi. Again, after this patch,
the -iscsi option only takes effect if an URL is given. New users are
supposed to use the new driver-specific option.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:16 -05:00
Kevin Wolf
d4e799292c iscsi: Add initiator-name option
This was previously only available with -iscsi. Again, after this patch,
the -iscsi option only takes effect if an URL is given. New users are
supposed to use the new driver-specific option.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:08 -05:00
Kevin Wolf
4317142020 iscsi: Handle -iscsi user/password in bdrv_parse_filename()
This splits the logic in the old parse_chap() function into a part that
parses the -iscsi options into the new driver-specific options, and
another part that actually applies those options (called apply_chap()
now).

Note that this means that username and password specified with -iscsi
only take effect when a URL is provided. This is intentional, -iscsi is
a legacy interface only supported for compatibility, new users should
use the proper driver-specific options.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:36:57 -05:00
Kevin Wolf
d5895fcb1d iscsi: Split URL into individual options
This introduces a .bdrv_parse_filename handler for iscsi which parses an
URL if given and translates it to individual options.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:36:34 -05:00
Peter Maydell
a1cf5fac2b Merge remote-tracking branch 'remotes/armbru/tags/pull-block-2017-02-21' into staging
Changes to -drive without if= and with if=scsi

# gpg: Signature made Tue 21 Feb 2017 12:22:35 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-block-2017-02-21:
  hw/i386: Deprecate -drive if=scsi with PC machine types
  hw: Deprecate -drive if=scsi with non-onboard HBAs
  hw/scsi: Concentrate -drive if=scsi auto-create in one place
  hw: Drop superfluous special checks for orphaned -drive
  blockdev: Make orphaned -drive fatal
  blockdev: Improve message for orphaned -drive
  hw/arm/highbank: Default -drive to if=ide instead of if=scsi
  hw: Default -drive to if=none instead of scsi when scsi cannot work
  hw: Default -drive to if=none instead of ide when ide cannot work
  hw/arm/cubieboard hw/arm/xlnx-ep108: Fix units_per_default_bus
  hw: Default -drive to if=ide explicitly where it works

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 13:58:50 +00:00
Markus Armbruster
f778a82f0c hw/i386: Deprecate -drive if=scsi with PC machine types
The PC machines (pc-q35-* pc-i440fx-* pc-* isapc xenfv) automatically
create lsi53c895a SCSI HBAs and SCSI devices to honor -drive if=scsi.
For giggles, try -drive if=scsi,bus=25,media=cdrom --- this makes QEMU
create 25 of them.

lsi53c895a is thoroughly obsolete (PCI Ultra2 SCSI, ca. 2000), and
currently has no maintainer in QEMU.  megasas is a better choice,
except with old OSes that lack drivers.  virtio-scsi is a much better
choice when you have a driver, but only (newish) Linux comes with one
in the box.  There is no good default that works for all guests.

Encourage users to pick a non-obsolete SCSI HBA that works for them by
deprecating -drive if=scsi.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487161136-9018-4-git-send-email-armbru@redhat.com>
Acked-By: Paolo Bonzini <pbonzini@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
a64aa5785d hw: Deprecate -drive if=scsi with non-onboard HBAs
Block backends defined with "-drive if=T" with T other than "none" are
meant to be picked up by machine initialization code: a suitable
frontend gets created and wired up automatically.

Drives defined with if=scsi are also picked up by SCSI HBAs added with
-device, unlike other interface types.  Deprecate this usage, as follows.

Create the frontends for onboard HBAs in machine initialization code,
exactly like we do for if=ide and other interface types.  Change
scsi_legacy_handle_cmdline() to create a frontend only when it's still
missing, and warn that this usage is deprecated.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487161136-9018-3-git-send-email-armbru@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
fb8b660e17 hw/scsi: Concentrate -drive if=scsi auto-create in one place
The logic to create frontends for -drive if=scsi is in SCSI HBAs.  For
all other interface types, it's in machine initialization code.

A few machine types create the SCSI HBAs necessary for that.  That's
also not done for other interface types.

I'm going to deprecate these SCSI eccentricities.  In preparation for
that, create the frontends in main() instead of the SCSI HBAs, by
calling new function scsi_legacy_handle_cmdline() there.

Note that not all SCSI HBAs create frontends.  Take care not to change
that.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487161136-9018-2-git-send-email-armbru@redhat.com>
Acked-By: Paolo Bonzini <pbonzini@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
8f2d75e81d hw: Drop superfluous special checks for orphaned -drive
We've traditionally rejected orphans here and there, but not
systematically.  For instance, the sun4m machines have an onboard SCSI
HBA (bus=0), and have always rejected bus>0.  Other machines with an
onboard SCSI HBA don't.

Commit a66c9dc made all orphans trigger a warning, and the previous
commit turned this into an error.  The checks "here and there" are now
redundant.  Drop them.

Note that the one in mips_jazz.c was wrong: it rejected bus > MAX_FD,
but MAX_FD is the number of floppy drives per bus.

Error messages change from

    $ qemu-system-x86_64 -drive if=ide,bus=2
    qemu-system-x86_64: Too many IDE buses defined (3 > 2)
    $ qemu-system-mips64 -M magnum,accel=qtest -drive if=floppy,bus=2,id=fd1
    qemu: too many floppy drives
    $ qemu-system-sparc -M LX -drive if=scsi,bus=1
    qemu: too many SCSI bus

to

    $ qemu-system-x86_64 -drive if=ide,bus=2
    qemu-system-x86_64: -drive if=ide,bus=2: machine type does not support if=ide,bus=2,unit=0
    $ qemu-system-mips64 -M magnum,accel=qtest -drive if=floppy,bus=2,id=fd1
    qemu-system-mips64: -drive if=floppy,bus=2,id=fd1: machine type does not support if=floppy,bus=2,unit=0
    $ qemu-system-sparc -M LX -drive if=scsi,bus=1
    qemu-system-sparc: -drive if=scsi,bus=1: machine type does not support if=scsi,bus=1,unit=0

Cc: John Snow <jsnow@redhat.com>
Cc: "Hervé Poussineau" <hpoussin@reactos.org>
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487153147-11530-9-git-send-email-armbru@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
720b8dc052 blockdev: Make orphaned -drive fatal
Block backends defined with "-drive if=T" with T other than "none" are
meant to be picked up by machine initialization code: a suitable
frontend gets created and wired up automatically.

If machine initialization code doesn't comply, the block backend
remains unused.  This triggers a warning since commit a66c9dc, v2.2.0.
Drives created by default are exempted; use -nodefaults to get rid of
them.

Turn this warning into an error.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487153147-11530-8-git-send-email-armbru@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
664cc623bf blockdev: Improve message for orphaned -drive
We warn when a -drive isn't supported by the machine type (commit
a66c9dc):

    $ qemu-system-x86_64 -S -display none -drive if=mtd
    Warning: Orphaned drive without device: id=mtd0,file=,if=mtd,bus=0,unit=0

Improve this to point to the offending bit of configuration:

    qemu-system-x86_64: -drive if=mtd: warning: machine type does not support if=mtd,bus=0,unit=0

Especially nice when it's hidden behind -readconfig foo.cfg:

    qemu-system-x86_64:foo.cfg:140: warning: machine type does not support if=mtd,bus=0,unit=0

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487153147-11530-7-git-send-email-armbru@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-02-21 13:17:40 +01:00
Markus Armbruster
2a7ae4ee50 hw/arm/highbank: Default -drive to if=ide instead of if=scsi
These machines have no onboard SCSI HBA, and no way to plug one.
-drive if=scsi therefore cannot work.  They do have an onboard IDE
controller (sysbus-ahci), but fail to honor if=ide.

Change their default to if=ide, and add a TODO comment on what needs
to be done to actually honor -drive if=ide.

Cc: Rob Herring <robh@kernel.org>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-arm@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1487153147-11530-6-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
7e465513c1 hw: Default -drive to if=none instead of scsi when scsi cannot work
Block backends defined with -drive if=scsi are meant to be picked up
by machine initialization code: a suitable frontend gets created and
wired up automatically.

if=scsi drives not picked up that way can still be used with -device
as if they had if=none, but that's unclean and best avoided.  Unused
ones produce an "Orphaned drive without device" warning.

A few machine types default to if=scsi, even though they don't
actually have a SCSI HBA.  This makes no sense.  Change their default
to if=none.  Affected machines:

* aarch64/arm: realview-pbx-a9 vexpress-a9 vexpress-a15 xilinx-zynq-a9

Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com>
Cc: Alistair Francis <alistair.francis@xilinx.com>
Cc: qemu-arm@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Message-Id: <1487153147-11530-5-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
a27fa28f03 hw: Default -drive to if=none instead of ide when ide cannot work
Block backends defined with -drive if=ide are meant to be picked up by
machine initialization code: a suitable frontend gets created and
wired up automatically.

if=ide drives not picked up that way can still be used with -device as
if they had if=none, but that's unclean and best avoided.  Unused ones
produce an "Orphaned drive without device" warning.

-drive parameter "if" is optional, and the default depends on the
machine type.  If a machine type doesn't specify a default, the
default is "ide".

Many machine types implicitly default to if=ide that way, even though
they don't actually have an IDE controller.  This makes no sense.

Change the implicit default to if=none.  Affected machines:

* all targets: none
* aarch64/arm: akita ast2500 canon cheetah collie connex imx25
  integratorcp kzm lm3s6965evb lm3s811evb mainstone musicpal n800 n810
  netduino2 nuri palmetto realview romulus sabrelite smdkc210 sx1 sx1
  verdex z2
* cris: axis-dev88
* i386/x86_64: xenpv
* lm32: lm32-evr lm32-uclinux milkymist
* m68k: an5206 dummy mcf5208evb
* microblaze/microblazeel: petalogix-ml605 petalogix-s3adsp1800
* mips/mips64/mips64el/mipsel: mipssim
* moxie: moxiesim
* or32: or32-sim
* ppc/ppc64/ppcemb: bamboo ref405ep taihu virtex-ml507
* ppc/ppc64: mpc8544ds ppce500
* sh4/sh4eb: shix
* sparc: leon3_generic
* sparc64: niagara
* tricore: tricore_testboard
* unicore32: puv3
* xtensa/xtensaeb: kc705 lx200 lx60 ml605 sim

None of these machines have an IDE controller, let alone code to
honor if=ide.

Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-arm@nongnu.org
Cc: Edgar E. Iglesias <edgar.iglesias@gmail.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Anthony Perard <anthony.perard@citrix.com>
Cc: xen-devel@lists.xensource.com
Cc: Michael Walle <michael@walle.cc>
Cc: Laurent Vivier <laurent@vivier.eu>
Cc: Anthony Green <green@moxielogic.com>
Cc: Jia Liu <proljc@gmail.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: qemu-ppc@nongnu.org
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: Fabien Chouteau <chouteau@adacore.com>
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Cc: Artyom Tarasenko <atar4qemu@gmail.com>
Cc: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Acked-By: Artyom Tarasenko <atar4qemu@gmail.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1487153147-11530-4-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
e0319b0302 hw/arm/cubieboard hw/arm/xlnx-ep108: Fix units_per_default_bus
Machine types cubieboard, xlnx-ep108, xlnx-zcu102 have an onboard AHCI
controller, but neglect to set their MachineClass member
units_per_default_bus = 1.  This permits -drive if=ide,unit=1, which
makes no sense for AHCI.  It also screws up index=N for odd N, because
it gets desugared to unit=1,bus=N/2

Doesn't really matter, because these machine types fail to honor
-drive if=ide.  Add the missing units_per_default_bus = 1 anyway,
along with a TODO comment on what needs to be done for -drive if=ide.

Also set block_default_type = IF_IDE explicitly.  It's currently the
default, but the next commit will change it to something more
sensible, and we want to keep the IF_IDE default for these three
machines.  See also the previous commit.

Cc: Beniamino Galvani <b.galvani@gmail.com>
Cc: Alistair Francis <alistair.francis@xilinx.com>
Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-arm@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Alistair Francis <alistair.francis@xilinx.com>
Message-Id: <1487153147-11530-3-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
2059839baa hw: Default -drive to if=ide explicitly where it works
Block backends defined with -drive if=ide are meant to be picked up by
machine initialization code: a suitable frontend gets created and
wired up automatically.

if=ide drives not picked up that way can still be used with -device as
if they had if=none, but that's unclean and best avoided.  Unused ones
produce an "Orphaned drive without device" warning.

-drive parameter "if" is optional, and the default depends on the
machine type.  If a machine type doesn't specify a default, the
default is "ide".

Many machine types default to if=ide, even though they don't actually
have an IDE controller.  A future patch will change these defaults to
something more sensible.  To prepare for it, this patch makes default
"ide" explicit for the machines that actually pick up if=ide drives:

* alpha: clipper
* arm/aarch64: spitz borzoi terrier tosa
* i386/x86_64: generic-pc-machine (with concrete subtypes pc-q35-*
  pc-i440fx-* pc-* isapc xenfv)
* mips64el: fulong2e
* mips/mipsel/mips64el: malta mips
* ppc/ppc64: mac99 g3beige prep
* sh4/sh4eb: r2d
* sparc64: sun4u sun4v

Note that ppc64 machine powernv already sets an "ide" default
explicitly.  Its IDE controller isn't implemented, yet.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1487153147-11530-2-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Peter Maydell
a0775e28cd Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Pull request

v2:
 * Rebased to resolve scsi conflicts

# gpg: Signature made Tue 21 Feb 2017 11:56:24 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request: (24 commits)
  coroutine-lock: make CoRwlock thread-safe and fair
  coroutine-lock: add mutex argument to CoQueue APIs
  coroutine-lock: place CoMutex before CoQueue in header
  test-aio-multithread: add performance comparison with thread-based mutexes
  coroutine-lock: add limited spinning to CoMutex
  coroutine-lock: make CoMutex thread-safe
  block: document fields protected by AioContext lock
  async: remove unnecessary inc/dec pairs
  aio-posix: partially inline aio_dispatch into aio_poll
  block: explicitly acquire aiocontext in aio callbacks that need it
  block: explicitly acquire aiocontext in bottom halves that need it
  block: explicitly acquire aiocontext in callbacks that need it
  block: explicitly acquire aiocontext in timers that need it
  aio: push aio_context_acquire/release down to dispatching
  qed: introduce qed_aio_start_io and qed_aio_next_io_cb
  blkdebug: reschedule coroutine on the AioContext it is running on
  coroutine-lock: reschedule coroutine on the AioContext it was running on
  nbd: convert to use qio_channel_yield
  io: make qio_channel_yield aware of AioContexts
  io: add methods to set I/O handlers on AioContext
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 11:58:03 +00:00
Paolo Bonzini
a7b91d35ba coroutine-lock: make CoRwlock thread-safe and fair
This adds a CoMutex around the existing CoQueue.  Because the write-side
can just take CoMutex, the old "writer" field is not necessary anymore.
Instead of removing it altogether, count the number of pending writers
during a read-side critical section and forbid further readers from
entering.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-7-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
1ace7ceac5 coroutine-lock: add mutex argument to CoQueue APIs
All that CoQueue needs in order to become thread-safe is help
from an external mutex.  Add this to the API.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-6-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
f8c6e1cbc3 coroutine-lock: place CoMutex before CoQueue in header
This will avoid forward references in the next patch.  It is also
more logical because CoQueue is not anymore the basic primitive.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-5-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
c05df34a87 test-aio-multithread: add performance comparison with thread-based mutexes
Add two implementations of the same benchmark as the previous patch,
but using pthreads.  One uses a normal QemuMutex, the other is Linux
only and implements a fair mutex based on MCS locks and futexes.
This shows that the slower performance of the 5-thread case is due to
the fairness of CoMutex, rather than to coroutines.  If fairness does
not matter, as is the case with two threads, CoMutex can actually be
faster than pthreads.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-4-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
480cff6322 coroutine-lock: add limited spinning to CoMutex
Running a very small critical section on pthread_mutex_t and CoMutex
shows that pthread_mutex_t is much faster because it doesn't actually
go to sleep.  What happens is that the critical section is shorter
than the latency of entering the kernel and thus FUTEX_WAIT always
fails.  With CoMutex there is no such latency but you still want to
avoid wait and wakeup.  So introduce it artificially.

This only works with one waiters; because CoMutex is fair, it will
always have more waits and wakeups than a pthread_mutex_t.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-3-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
fed20a70e3 coroutine-lock: make CoMutex thread-safe
This uses the lock-free mutex described in the paper '"Blocking without
Locking", or LFTHREADS: A lock-free thread library' by Gidenstam and
Papatriantafilou.  The same technique is used in OSv, and in fact
the code is essentially a conversion to C of OSv's code.

[Added missing coroutine_fn in tests/test-aio-multithread.c.
--Stefan]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-2-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
91bcea4899 block: document fields protected by AioContext lock
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-19-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
bd451435c0 async: remove unnecessary inc/dec pairs
Pull the increment/decrement pair out of aio_bh_poll and into the
callers.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-18-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
a153bf52b3 aio-posix: partially inline aio_dispatch into aio_poll
This patch prepares for the removal of unnecessary lockcnt inc/dec pairs.
Extract the dispatching loop for file descriptor handlers into a new
function aio_dispatch_handlers, and then inline aio_dispatch into
aio_poll.

aio_dispatch can now become void.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-17-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:39 +00:00
Paolo Bonzini
b9e413dd37 block: explicitly acquire aiocontext in aio callbacks that need it
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-16-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:39 +00:00
Paolo Bonzini
1919631e6b block: explicitly acquire aiocontext in bottom halves that need it
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-15-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:39 +00:00
Paolo Bonzini
9d45665448 block: explicitly acquire aiocontext in callbacks that need it
This covers both file descriptor callbacks and polling callbacks,
since they execute related code.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-14-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:36 +00:00
Paolo Bonzini
2f47da5f7f block: explicitly acquire aiocontext in timers that need it
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-13-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
0836c72f70 aio: push aio_context_acquire/release down to dispatching
The AioContext data structures are now protected by list_lock and/or
they are walked with FOREACH_RCU primitives.  There is no need anymore
to acquire the AioContext for the entire duration of aio_dispatch.
Instead, just acquire it before and after invoking the callbacks.
The next step is then to push it further down.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-12-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
b20123a28b qed: introduce qed_aio_start_io and qed_aio_next_io_cb
qed_aio_start_io and qed_aio_next_io will not have to acquire/release
the AioContext, while qed_aio_next_io_cb will.  Split the functionality
and gain a little type-safety in the process.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-11-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
e5c67ab552 blkdebug: reschedule coroutine on the AioContext it is running on
Keep the coroutine on the same AioContext.  Without this change,
there would be a race between yielding the coroutine and reentering it.
While the race cannot happen now, because the code only runs from a single
AioContext, this will change with multiqueue support in the block layer.

While doing the change, replace custom bottom half with aio_co_schedule.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-10-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
a9d9235567 coroutine-lock: reschedule coroutine on the AioContext it was running on
As a small step towards the introduction of multiqueue, we want
coroutines to remain on the same AioContext that started them,
unless they are moved explicitly with e.g. aio_co_schedule.  This patch
avoids that coroutines switch AioContext when they use a CoMutex.
For now it does not make much of a difference, because the CoMutex
is not thread-safe and the AioContext itself is used to protect the
CoMutex from concurrent access.  However, this is going to change.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-9-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
ff82911cd3 nbd: convert to use qio_channel_yield
In the client, read the reply headers from a coroutine, switching the
read side between the "read header" coroutine and the I/O coroutine that
reads the body of the reply.

In the server, if the server can read more requests it will create a new
"read request" coroutine as soon as a request has been read.  Otherwise,
the new coroutine is created in nbd_request_put.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-8-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
c4c497d27f io: make qio_channel_yield aware of AioContexts
Support separate coroutines for reading and writing, and place the
read/write handlers on the AioContext that the QIOChannel is registered
with.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-7-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
bf88c1247f io: add methods to set I/O handlers on AioContext
This is in preparation for making qio_channel_yield work on
AioContexts other than the main one.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-6-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
934ebf48c0 test-thread-pool: use generic AioContext infrastructure
Once the thread pool starts using aio_co_wake, it will also need
qemu_get_current_aio_context().  Make test-thread-pool create
an AioContext with qemu_init_main_loop, so that stubs/iothread.c
and tests/iothread.c can provide the rest.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-5-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
35f106e684 block-backend: allow blk_prw from coroutine context
qcow2_create2 calls this.  Do not run a nested event loop, as that
breaks when aio_co_wake tries to queue the coroutine on the co_queue_wakeup
list of the currently running one.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-4-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
0c330a734b aio: introduce aio_co_schedule and aio_co_wake
aio_co_wake provides the infrastructure to start a coroutine on a "home"
AioContext.  It will be used by CoMutex and CoQueue, so that coroutines
don't jump from one context to another when they go to sleep on a
mutex or waitqueue.  However, it can also be used as a more efficient
alternative to one-shot bottom halves, and saves the effort of tracking
which AioContext a coroutine is running on.

aio_co_schedule is the part of aio_co_wake that starts a coroutine
on a remove AioContext, but it is also useful to implement e.g.
bdrv_set_aio_context callbacks.

The implementation of aio_co_schedule is based on a lock-free
multiple-producer, single-consumer queue.  The multiple producers use
cmpxchg to add to a LIFO stack.  The consumer (a per-AioContext bottom
half) grabs all items added so far, inverts the list to make it FIFO,
and goes through it one item at a time until it's empty.  The data
structure was inspired by OSv, which uses it in the very code we'll
"port" to QEMU for the thread-safe CoMutex.

Most of the new code is really tests.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-3-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
c2b38b277a block: move AioContext, QEMUTimer, main-loop to libqemuutil
AioContext is fairly self contained, the only dependency is QEMUTimer but
that in turn doesn't need anything else.  So move them out of block-obj-y
to avoid introducing a dependency from io/ to block-obj-y.

main-loop and its dependency iohandler also need to be moved, because
later in this series io/ will call iohandler_get_aio_context.

[Changed copyright "the QEMU team" to "other QEMU contributors" as
suggested by Daniel Berrange and agreed by Paolo.
--Stefan]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-2-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Peter Maydell
b856256179 Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20170221-1' into staging
xhci: add qemu-xhci device, some followup cleanups.
ccid: better sanity checking.
ehci: fix memory leak
ohci: bugfixes.

# gpg: Signature made Tue 21 Feb 2017 07:14:35 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20170221-1:
  usb-ccid: add check message size checks
  usb-ccid: move header size check
  usb-ccid: better bulk_out error handling
  xhci: drop via vendor command handling
  xhci: fix nec vendor quirk handling
  xhci: add qemu xhci controller
  xhci: drop ER_FULL_HACK workaround
  xhci: apply limits to loops
  usb: ohci: limit the number of link eds
  usb: ohci: fix error return code in servicing iso td
  usb: ehci: fix memory leak in ehci

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 09:35:15 +00:00
Gerd Hoffmann
31fb4444a4 usb-ccid: add check message size checks
Check message size too when figuring whenever we should expect more data.
Fix debug message to show useful data, p->iov.size is fixed anyway if we
land there, print how much we got meanwhile instead.

Also check announced message size against actual message size.  That
is a more general fix for CVE-2017-5898 than commit "c7dfbf3 usb: ccid:
check ccid apdu length".

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487250819-23764-4-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
7569c54642 usb-ccid: move header size check
Move up header size check, so we can use header fields in sanity checks
(in followup patches).  Also reword the debug message.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487250819-23764-3-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
0aeebc73b7 usb-ccid: better bulk_out error handling
Add err goto label where we can jump to from all error conditions.
STALL request on all errors.  Reset position on all errors.

Normal request processing is not in a else branch any more, so this code
is reintended, there are no code changes in that part of the code
though.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487250819-23764-2-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
558ff1b6ef xhci: drop via vendor command handling
Seems pretty pointless, we don't emulate an via xhci controller.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486382139-30630-5-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
2992d6b49c xhci: fix nec vendor quirk handling
Only the TYPE_NEC_XHCI controller will have the nec vendor quirks.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486382139-30630-4-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
72a810f411 xhci: add qemu xhci controller
Turn existing TYPE_XHCI into an abstract base class.
Create two child classes, TYPE_NEC_XHCI (same name as old xhci
controller) and TYPE_QEMU_XHCI (using an ID from our namespace).

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Message-id: 1486382139-30630-3-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
898248a329 xhci: drop ER_FULL_HACK workaround
The nec/renesas driver problems have finally been debugged and root
caused, see commit "7da76e1 xhci: fix event queue IRQ handling".

It's pretty clear now that
 (a) The whole "driver can't handle ring full" story is most likely
     wrong.
 (b) The ER_FULL_HACK workaround based on the false assumtion doesn't
     much.  It avoids the driver crashing (without commit 7da76e1), but
     it doesn't make usb work.
 (c) With 7da76e1 applied it doesn't trigger any more.

So, lets kill it.  Or, to be exact, lets almost kill it.  Some data
fields are kept unused in the state struct, for live migration backward
compatibility.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486382139-30630-2-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
f89b60f6e5 xhci: apply limits to loops
Limits should be big enough that normal guest should not hit it.
Add a tracepoint to log them, just in case.  Also, while being
at it, log the existing link trb limit too.

Reported-by: 李强 <liqiang6-s@360.cn>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486383669-6421-1-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Li Qiang
95ed56939e usb: ohci: limit the number of link eds
The guest may builds an infinite loop with link eds. This patch
limit the number of linked ed to avoid this.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 5899a02e.45ca240a.6c373.93c1@mx.google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-21 08:11:42 +01:00
Li Qiang
26f670a244 usb: ohci: fix error return code in servicing iso td
It should return 1 if an error occurs when reading iso td.
This will avoid an infinite loop issue in ohci_service_ed_list.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 5899ac3e.1033240a.944d5.9a2d@mx.google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-21 08:11:42 +01:00
Li Qiang
d710e1e7bd usb: ehci: fix memory leak in ehci
In usb_ehci_init function, it initializes 's->ipacket', but there
is no corresponding function to free this. As the ehci can be hotplug
and unplug, this will leak host memory leak. In order to make the
hierarchy clean, we should add a ehci pci finalize function, then call
the clean function in ehci device.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 589a85b8.3c2b9d0a.b8e6.1434@mx.google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-21 08:11:42 +01:00
Peter Maydell
56f9e46b84 Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-02-20' into staging
QAPI patches for 2017-02-20

# gpg: Signature made Mon 20 Feb 2017 13:31:12 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2017-02-20:
  Makefile: Put VERSION info into version.texi rather than using -D
  qapi2texi: replace quotation by bold section name

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 17:42:47 +00:00
Peter Maydell
c8f21dbfc3 Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20170220-1' into staging
ui: opengl fixes, for spice and egl-helpers.

# gpg: Signature made Mon 20 Feb 2017 13:12:46 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-ui-20170220-1:
  egl-helpers: Support newer MESA versions
  spice: allow to specify drm rendernode

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 16:31:38 +00:00
Peter Maydell
6753e4ed15 Merge remote-tracking branch 'remotes/kraxel/tags/pull-input-20170220-1' into staging
input: add wctablet, ps2 fix

# gpg: Signature made Mon 20 Feb 2017 11:42:12 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-input-20170220-1:
  Add wctablet device
  ps2: fix mouse mappings for right/middle button

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 13:38:34 +00:00
Peter Maydell
fea346f569 Makefile: Put VERSION info into version.texi rather than using -D
Unfortunately some older versions of makeinfo don't correctly
handle the -D command line option and fail to set the variable.
This then causes them to complain
 docs/qemu-ga-ref.texi:41: warning: undefined flag: VERSION

Work around this by doing as the autotools do, and writing
the information into a version.texi file which we then
include from the .texi files that need it.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487357968-31000-1-git-send-email-peter.maydell@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-02-20 14:11:07 +01:00
Marc-André Lureau
1ede77dfd2 qapi2texi: replace quotation by bold section name
When we build qemu-qmp-ref.txt this causes texinfo to complain several
times:
"Negative repeat count does nothing at
/usr/share/texinfo/Texinfo/Convert/Line.pm line 124."

It also doesn't display correctly, because the "Notes" text disappears
entirely in the HTML version because it thinks there's no actual
quotation text.

The text file output formatting is also not good.

To solve those problems, remove usage of @quotation, and simply use bold
face for the section name.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170217093416.27688-1-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-02-20 14:10:46 +01:00
Peter Maydell
5d42ff913b Merge remote-tracking branch 'remotes/huth/tags/coldfire-20170219' into staging
Updates for the m68k ColdFire machines:
- Remove the obsolete dummy machine
- QOMify the ColdFire interrupt controller
- Volunteer for maintaining the orphan ColdFire boards

# gpg: Signature made Sat 18 Feb 2017 23:08:55 GMT
# gpg:                using RSA key 0x2ED9D774FE702DB5
# gpg: Good signature from "Thomas Huth <th.huth@gmx.de>"
# gpg:                 aka "Thomas Huth <thuth@redhat.com>"
# gpg:                 aka "Thomas Huth <huth@tuxfamily.org>"
# Primary key fingerprint: 27B8 8847 EEE0 2501 18F3  EAB9 2ED9 D774 FE70 2DB5

* remotes/huth/tags/coldfire-20170219:
  MAINTAINERS: Add odd fixer for the ColdFire boards
  hw/m68k: QOMify the ColdFire interrupt controller
  hw/m68k: Remove dummy machine

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 11:55:37 +00:00
Frediano Ziglio
0ea1523fb6 egl-helpers: Support newer MESA versions
According to
https://www.khronos.org/registry/EGL/extensions/MESA/EGL_MESA_platform_gbm.txt
if MESA_platform_gbm is supported display should be initialized
from a GBM handle using eglGetPlatformDisplayEXT.

Signed-off-by: Frediano Ziglio <fziglio@redhat.com>
Message-id: 20170220095055.4234-1-fziglio@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-20 12:46:09 +01:00
Marc-André Lureau
7b5255083b spice: allow to specify drm rendernode
When multiple GPU are available, picking the first one isn't always the
best choice. Learn to specify a device rendernode.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170212112118.16044-1-marcandre.lureau@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-20 12:44:32 +01:00
Anatoli Huseu1
378af96155 Add wctablet device
Add QEMU Wacom Penpartner serial tablet emulation.
GSoC 2016 project.

Signed-off-by: Anatoli Huseu1 <avg.tolik@gmail.com>

Various cleanups.
Add line speed tracking.
Implement ST and SP commands.
Adapted to chardev QOMification.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486391007-10116-1-git-send-email-kraxel@redhat.com
2017-02-20 11:26:28 +01:00
Fabian Lesniak
ed6f72b827 ps2: fix mouse mappings for right/middle button
Commit 8b0caab0 ("ps2: add support for mice with extra/side buttons")
accidentally swapped right and middle mouse buttons. This commit corrects
the mapping as expected by the ps2 controller.

Signed-off-by: Fabian Lesniak <fabian@lesniak-it.de>
Message-id: 20170204150319.8907-1-fabian@lesniak-it.de
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-20 11:25:38 +01:00
Peter Maydell
d514cfd763 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, pci: fixes, features

virtio is using region caches for performance
iommu support for IOTLBs
misc fixes

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 17 Feb 2017 19:53:02 GMT
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (23 commits)
  intel_iommu: vtd_slpt_level_shift check level
  intel_iommu: convert dbg macros to trace for trans
  intel_iommu: convert dbg macros to traces for inv
  intel_iommu: renaming gpa to iova where proper
  intel_iommu: simplify irq region translation
  intel_iommu: add "caching-mode" option
  vfio: allow to notify unmap for very large region
  vfio: introduce vfio_get_vaddr()
  vfio: trace map/unmap for notify as well
  pcie: simplify pcie_add_capability()
  virtio: Fix no interrupt when not creating msi controller
  virtio: use VRingMemoryRegionCaches for avail and used rings
  virtio: check for vring setup in virtio_queue_update_used_idx
  virtio: use VRingMemoryRegionCaches for descriptor ring
  virtio: add MemoryListener to cache ring translations
  virtio: use MemoryRegionCache to access descriptors
  exec: make address_space_cache_destroy idempotent
  virtio: use address_space_map/unmap to access descriptors
  virtio: add virtio_*_phys_cached
  memory: make memory_listener_unregister idempotent
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 09:53:59 +00:00
Thomas Huth
5baf2741b4 MAINTAINERS: Add odd fixer for the ColdFire boards
I did some work with real ColdFire boards in the past, and after
QOMifying most of the ColdFire devices recently, I feel confident
that I could at least take care of odd fixes for these boards.

Signed-off-by: Thomas Huth <huth@tuxfamily.org>
2017-02-18 22:23:31 +01:00
Thomas Huth
88b86983f3 hw/m68k: QOMify the ColdFire interrupt controller
Use type_init() and friends to adapt the ColdFire interrupt
controller to the latest QEMU device conventions.

Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Thomas Huth <huth@tuxfamily.org>
2017-02-18 22:23:31 +01:00
Thomas Huth
22f2dbe7ea hw/m68k: Remove dummy machine
Since it is now possible to instantiate a CPU and RAM with the "none"
machine, too, and a kernel can be loaded there with the generic loader
device, there is no more need for the m68k "dummy" machine. Thus let's
remove this unmaintained file now.

Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Thomas Huth <huth@tuxfamily.org>
2017-02-18 22:23:25 +01:00
Peter Xu
7e58326ad7 intel_iommu: vtd_slpt_level_shift check level
This helps in debugging incorrect level passed in.

Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
6c441e1d61 intel_iommu: convert dbg macros to trace for trans
Another patch to convert the DPRINTF() stuffs. This patch focuses on the
address translation path and caching.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
bc535e59c4 intel_iommu: convert dbg macros to traces for inv
VT-d codes are still using static DEBUG_INTEL_IOMMU macro. That's not
good, and we should end the day when we need to recompile the code
before getting useful debugging information for vt-d. Time to switch to
the trace system. This is the first patch to do it.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
6e9055641b intel_iommu: renaming gpa to iova where proper
There are lots of places in current intel_iommu.c codes that named
"iova" as "gpa". It is really confusing to use a name "gpa" in these
places (which is very easily to be understood as "Guest Physical
Address", while it's not). To make the codes (much) easier to be read, I
decided to do this once and for all.

No functional change is made. Only literal ones.

Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
046ab7e9be intel_iommu: simplify irq region translation
Now we have a standalone memory region for MSI, all the irq region
requests should be redirected there. Cleaning up the block with an
assertion instead.

Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Aviv Ben-David
3b40f0e53c intel_iommu: add "caching-mode" option
This capability asks the guest to invalidate cache before each map operation.
We can use this invalidation to trap map operations in the hypervisor.

Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
[peterx: using "caching-mode" instead of "cache-mode" to align with spec]
[peterx: re-write the subject to make it short and clear]
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
dfbd90e5b9 vfio: allow to notify unmap for very large region
Linux vfio driver supports to do VFIO_IOMMU_UNMAP_DMA for a very big
region. This can be leveraged by QEMU IOMMU implementation to cleanup
existing page mappings for an entire iova address space (by notifying
with an IOTLB with extremely huge addr_mask). However current
vfio_iommu_map_notify() does not allow that. It make sure that all the
translated address in IOTLB is falling into RAM range.

The check makes sense, but it should only be a sensible checker for
mapping operations, and mean little for unmap operations.

This patch moves this check into map logic only, so that we'll get
faster unmap handling (no need to translate again), and also we can then
better support unmapping a very big region when it covers non-ram ranges
or even not-existing ranges.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
4a4b88fbe1 vfio: introduce vfio_get_vaddr()
A cleanup for vfio_iommu_map_notify(). Now we will fetch vaddr even if
the operation is unmap, but it won't hurt much.

One thing to mention is that we need the RCU read lock to protect the
whole translation and map/unmap procedure.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
3213835720 vfio: trace map/unmap for notify as well
We traces its range, but we don't know whether it's a MAP/UNMAP. Let's
dump it as well.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
d4e9b75aa0 pcie: simplify pcie_add_capability()
When we add PCIe extended capabilities, we should be following the rule
that we add the head extended cap (at offset 0x100) first, then the rest
of them. Meanwhile, we are always adding new capability bits at the end
of the list. Here the "next" looks meaningless in all cases since it
should always be zero (along with the "header").

Simplify the function a bit, and it looks more readable now.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Michael S. Tsirkin
b4b9862b53 virtio: Fix no interrupt when not creating msi controller
For ARM virt machine, if we use virt-2.7 which will not create ITS node,
the virtio-net can not recieve interrupts so it can't get ip address
through dhcp.
This fixes commit 83d768b(virtio: set ISR on dataplane notifications).

Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
97cd965c07 virtio: use VRingMemoryRegionCaches for avail and used rings
The virtio-net change is necessary because it uses virtqueue_fill
and virtqueue_flush instead of the more convenient virtqueue_push.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
ca0176ad83 virtio: check for vring setup in virtio_queue_update_used_idx
If the vring has not been set up, it is not necessary for vring_used_idx
to do anything (as is already the case when the caller is virtio_load).
This is harmless for now, but it will be a problem when the
MemoryRegionCache has not been set up.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
991976f751 virtio: use VRingMemoryRegionCaches for descriptor ring
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
c611c76417 virtio: add MemoryListener to cache ring translations
The cached translations are RCU-protected to allow efficient use
when processing virtqueues.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
5eba0404b9 virtio: use MemoryRegionCache to access descriptors
For now, the cache is created on every virtqueue_pop.  Later on,
direct descriptors will be able to reuse it.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
91047df38d exec: make address_space_cache_destroy idempotent
Clear cache->mr so that address_space_cache_destroy does nothing
the second time it is called.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
9796d0ac8f virtio: use address_space_map/unmap to access descriptors
This makes little difference, but it makes the code change smaller
for the next patch that introduces MemoryRegionCache.  This is
because map/unmap are similar to MemoryRegionCache init/destroy.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
e6a830d6eb virtio: add virtio_*_phys_cached
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
1d8280c18f memory: make memory_listener_unregister idempotent
Make it easy to unregister a MemoryListener without tracking whether it
had been registered before.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Haozhong Zhang
79c0f397fe docs: add document to explain the usage of vNVDIMM
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Fam Zheng
0793169870 virtio: Report real progress in VQ aio poll handler
In virtio_queue_host_notifier_aio_poll, not all "!virtio_queue_empty()"
cases are making true progress.

Currently the offending one is virtio-scsi event queue, whose handler
does nothing if no event is pending. As a result aio_poll() will spin on
the "non-empty" VQ and take 100% host CPU.

Fix this by reporting actual progress from virtio queue aio handlers.

Reported-by: Ed Swierk <eswierk@skyportsystems.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Tested-by: Ed Swierk <eswierk@skyportsystems.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Michael S. Tsirkin
4bb571d857 pci/pcie: don't assume cap id 0 is reserved
VFIO actually wants to create a capability with ID == 0.
This is done to make guest drivers skip the given capability.
pcie_add_capability then trips up on this capability
when looking for end of capability list.

To support this use-case, it's easy enough to switch to
e.g. 0xffffffff for these comparisons - we can be sure
it will never match a 16-bit capability ID.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-17 21:52:30 +02:00
Peter Maydell
ad584d37f2 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* GUEST_PANICKED improvements (Anton)
* vCont gdbstub rewrite (Claudio)
* Fix CPU creation with -device (Liyang)
* Logging fixes for pty chardevs (Ed)
* Makefile "move if changed" fix (Lin)
* First part of cpu_exec refactoring (me)
* SVM emulation fix (me)
* apic_delivered fix (Pavel)
* "info ioapic" fix (Peter)
* qemu-nbd socket activation (Richard)
* QOMification of mcf_uart (Thomas)

# gpg: Signature made Thu 16 Feb 2017 17:37:31 GMT
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (23 commits)
  target-i386: correctly propagate retaddr into SVM helpers
  vl: log available guest crash information
  report guest crash information in GUEST_PANICKED event
  i386/cpu: add crash-information QOM property
  Makefile: avoid leaving the temporary QEMU_PKGVERSION header file
  vl: Move the cpu_synchronize_all_post_init() after generic devices initialization
  qemu-nbd: Implement socket activation.
  qemu-doc: Clarify that -vga std is now the default
  cpu-exec: remove outermost infinite loop
  cpu-exec: avoid repeated sigsetjmp on interrupts
  cpu-exec: avoid cpu_loop_exit in cpu_handle_interrupt
  cpu-exec: tighten barrier on TCG_EXIT_REQUESTED
  cpu-exec: fix icount out-of-bounds access
  hw/char/mcf_uart: QOMify the ColdFire UART
  gdbstub: Fix vCont behaviour
  move vm_start to cpus.c
  char: drop data written to a disconnected pty
  apic: reset apic_delivered global variable on machine reset
  qemu-char: socket backend: disconnect on write error
  test-vmstate: remove yield_until_fd_readable
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-16 17:46:52 +00:00
Paolo Bonzini
65c9d60a3a target-i386: correctly propagate retaddr into SVM helpers
Commit 2afbdf8 ("target-i386: exception handling for memory helpers",
2015-09-15) changed tlb_fill's cpu_restore_state+raise_exception_err
to raise_exception_err_ra.  After this change, the cpu_restore_state
and raise_exception_err's cpu_loop_exit are merged into
raise_exception_err_ra's cpu_loop_exit_restore.

This actually fixed some bugs, but when SVM is enabled there is a
second path from raise_exception_err_ra to cpu_loop_exit.  This is
the VMEXIT path, and now cpu_vmexit is called without a
cpu_restore_state before.

The fix is to pass the retaddr to cpu_vmexit (via
cpu_svm_check_intercept_param).  All helpers can now use GETPC() to pass
the correct retaddr, too.

Cc: qemu-stable@nongnu.org
Fixes: 2afbdf8480
Reported-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
Tested-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 18:37:01 +01:00
Peter Maydell
7a37b59f1d Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-upstream-pull-request' into staging
# gpg: Signature made Thu 16 Feb 2017 14:35:46 GMT
# gpg:                using RSA key 0xF30C38BD3F2FBE3C
# gpg: Good signature from "Laurent Vivier <lvivier@redhat.com>"
# gpg:                 aka "Laurent Vivier <laurent@vivier.eu>"
# gpg:                 aka "Laurent Vivier (Red Hat) <lvivier@redhat.com>"
# Primary key fingerprint: CD2F 75DD C8E3 A4DC 2E4F  5173 F30C 38BD 3F2F BE3C

* remotes/vivier2/tags/linux-user-for-upstream-pull-request:
  linux-user: Add FICLONE and FICLONERANGE ioctls
  linux-user: Use correct types in load_symbols()
  linux-user: fill target sigcontext struct accordingly
  linux-user: fix tcg/mmap test
  linux-user: fix settime old value location
  linux-user: Update m68k syscall definitions to match Linux 4.6
  linux-user: Update sh4 syscall definitions to match Linux 4.8
  linux-user: manage two new IFLA host message types
  linux-user: Fix mq_open
  linux-user: Fix readahead
  linux-user: Fix inotify_init1 support
  linux-user: Fix s390x safe-syscall for z900
  linux-user: drop __cygwin__ ifdef
  linux-user: remove ifdef __USER_MISC

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-16 15:03:28 +00:00
Anton Nefedov
f47291b7a7 vl: log available guest crash information
There is a suitable log mask for the purpose.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-Id: <1487053524-18674-4-git-send-email-den@openvz.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:49 +01:00
Anton Nefedov
c86f106b85 report guest crash information in GUEST_PANICKED event
it's not very convenient to use the crash-information property interface,
so provide a CPU class callback to get the guest crash information, and pass
that information in the event

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-Id: <1487053524-18674-3-git-send-email-den@openvz.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:49 +01:00
Anton Nefedov
d187e08dc4 i386/cpu: add crash-information QOM property
Windows reports BSOD parameters through Hyper-V crash MSRs. This
information is very useful for initial crash analysis and thus
it would be nice to have a way to fetch it.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-Id: <1487053524-18674-2-git-send-email-den@openvz.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:49 +01:00
d9e73d32a8 Makefile: avoid leaving the temporary QEMU_PKGVERSION header file
By commit 67a1de0d, When we perform 'git pull && make && sudo make install',
In 'make' stage a qemu-version.h.tmp will be generated. If the content of
qemu-version.h.tmp and qemu-version.h aren't consistent, The qemu-version.h.tmp
will be renamed to qemu-version.h. Because of the target FORCE, The same action
will be do again in 'make install' stage.

In 'make install' stage, If there is no qemu-version.h.tmp exists and we run
'make install' with sudo, The owner and group of new qemu-version.h.tmp will be
privileged user/group. When we run 'make' next time, qemu-version.h.tmp can't
be overwritten because of permission issue.

This patch removed qemu-version.h.tmp after build to fix this issue.

Signed-off-by: Lin Ma <lma@suse.com>
Message-Id: <20170215024030.23895-1-lma@suse.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:49 +01:00
Dou Liyang
3741c2503b vl: Move the cpu_synchronize_all_post_init() after generic devices initialization
At the Qemu initialization, we call the cpu_synchronize_all_post_init()
to synchronize All CPU states to KVM in the ./vl.c::main().

Currently, it is called before we initialize the CPUs, which is created
by "-device" command and parsed by generic devices initialization, So,
these CPUs may be ignored to synchronize.

The patch moves the cpu_synchronize_all_post_init func after generic
devices initialization to make sure that all the CPUs can be included.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Message-Id: <1485916178-17838-1-git-send-email-douly.fnst@cn.fujitsu.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Acked-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:49 +01:00
Richard W.M. Jones
a721f53b8f qemu-nbd: Implement socket activation.
Socket activation (sometimes known as systemd socket activation)
allows an Internet superserver to pass a pre-opened listening socket
to the process, instead of having qemu-nbd open a socket itself.  This
is done via the LISTEN_FDS and LISTEN_PID environment variables, and a
standard file descriptor range.

This change partially implements socket activation for qemu-nbd.  If
the environment variables are set correctly, then socket activation
will happen automatically, otherwise everything works as before.  The
limitation is that LISTEN_FDS must be 1.

Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
Message-Id: <20170204100317.32425-2-rjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:45 +01:00
Helge Deller
21992cb679 linux-user: Add FICLONE and FICLONERANGE ioctls
Add missing FICLONE and FICLONERANGE ioctls.

Signed-off-by: Helge Deller <deller@gmx.de>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170211222602.GA6399@ls3530.fritz.box>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:30 +01:00
Peter Maydell
1e06262da6 linux-user: Use correct types in load_symbols()
Coverity doesn't like the code in load_symbols() which assumes
it can use 'int' for a variable that might hold an offset into
the guest ELF file, because in a 64-bit guest that could
overflow. Guest binaries with 2GB sections aren't very likely
and this isn't a security issue because we fully trust the
guest linux-user binary anyway, but we might as well use the
right types, which will placate Coverity. Use uint64_t to
hold section sizes, and bail out if the symbol table is too
large rather than just overflowing an int.

(Coverity issue CID1005776)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1486249533-5260-1-git-send-email-peter.maydell@linaro.org>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:30 +01:00
Jose Ricardo Ziviani
26920a2961 linux-user: fill target sigcontext struct accordingly
A segfault is noticed when an emulated program uses any of ucontext
regs fields. Risu detected this issue in the following operation when
handling a signal:
  ucontext_t *uc = (ucontext_t*)uc;
  uc->uc_mcontext.regs->nip += 4;

but this works fine:
  uc->uc_mcontext.gp_regs[PT_NIP] += 4;

This patch set regs to a valid location as well as other sigcontext
fields.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <1485900317-3256-1-git-send-email-joserz@linux.vnet.ibm.com>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:30 +01:00
Marc-André Lureau
35f2fd04ce linux-user: fix tcg/mmap test
tests/tcg/mmap test fails with values other than default target page
size. When creating a map beyond EOF, extra anonymous pages are added up
to the target page boundary. Currently, this operation is performed only
when qemu_real_host_page_size < TARGET_PAGE_SIZE, but it should be
performed if the configured page size (qemu -p) is larger than
qemu_real_host_page_size too.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
[pranith: dropped checkpatch changes]
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170119151533.29328-2-bobby.prani@gmail.com>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:30 +01:00
Marc-André Lureau
40c80b5e9e linux-user: fix settime old value location
old_value is the 4th argument of timer_settime(), not the 2nd.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170119151533.29328-1-bobby.prani@gmail.com>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:30 +01:00
John Paul Adrian Glaubitz
23d208ce6d linux-user: Update m68k syscall definitions to match Linux 4.6
Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170116224915.19430-2-glaubitz@physik.fu-berlin.de>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:26 +01:00
John Paul Adrian Glaubitz
3148ff8404 linux-user: Update sh4 syscall definitions to match Linux 4.8
Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170116223140.18634-2-glaubitz@physik.fu-berlin.de>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:16 +01:00
Alberto Garcia
41eeb0e601 qemu-doc: Clarify that -vga std is now the default
The QEMU manual page states that Cirrus Logic is the default video
card if the user doesn't specify any. However this is not true since
QEMU 2.2.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-Id: <20170127094154.19778-1-berto@igalia.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Paolo Bonzini
4515e58d60 cpu-exec: remove outermost infinite loop
Reorganize the sigsetjmp so that the restart case falls through
to cpu_handle_exception and the execution loop.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Paolo Bonzini
a42cf3f3f2 cpu-exec: avoid repeated sigsetjmp on interrupts
The sigsetjmp only needs to be prepared once for the whole execution
of cpu_exec.  This patch takes care of the "== 0" side, using a
nested loop so that cpu_handle_interrupt goes straight back to
cpu_handle_exception without doing another sigsetjmp.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Paolo Bonzini
209b71b60e cpu-exec: avoid cpu_loop_exit in cpu_handle_interrupt
The siglongjmp goes straight back to the beginning of cpu_exec's
outermost loop.  We do not need a siglongjmp, we can simply
leave the inner TB execution loop.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Paolo Bonzini
a70fe14b7d cpu-exec: tighten barrier on TCG_EXIT_REQUESTED
This seems to have worked just fine so far on weakly-ordered
architectures, but I don't see anything that prevents the
reordering from:

    store 1 to exit_request
    store 1 to tcg_exit_req
                                 load tcg_exit_req
                                 store 0 to tcg_exit_req
                                 load exit_request
                                 store 0 to exit_request
    store 1 to exit_request
    store 1 to tcg_exit_req

to this:

    store 1 to exit_request
    store 1 to tcg_exit_req
                                 load tcg_exit_req
                                 load exit_request
    store 1 to exit_request
    store 1 to tcg_exit_req
                                 store 0 to tcg_exit_req
                                 store 0 to exit_request

therefore losing a request.  It's possible that other memory barriers
(e.g. in rcu_read_unlock) are hiding it, but better safe than
sorry.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Paolo Bonzini
43d70ddf9f cpu-exec: fix icount out-of-bounds access
When icount is active, tb_add_jump is surprisingly called with an
out of bounds basic block index.  I have no idea how that can work,
but it does not seem like a good idea.  Clear *last_tb for all
TB_EXIT_ICOUNT_EXPIRED cases, even when all you have to do is
refill icount_extra.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Thomas Huth
d9ff1d35c5 hw/char/mcf_uart: QOMify the ColdFire UART
Use type_init() etc. to adapt the ColdFire UART
to the latest QEMU device conventions.

Signed-off-by: Thomas Huth <huth@tuxfamily.org>
Message-Id: <1485586582-6490-1-git-send-email-huth@tuxfamily.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Claudio Imbrenda
544177ad1c gdbstub: Fix vCont behaviour
When GDB issues a "vCont", QEMU was not handling it correctly when
multiple VCPUs are active.
For vCont, for each thread (VCPU), it can be specified whether to
single step, continue or stop that thread. The default is to stop a
thread.
However, when (for example) "vCont;s:2" is issued, all VCPUs continue
to run, although all but VCPU nr 2 are to be stopped.

This patch completely rewrites the vCont parsing code.

Please note that this improvement only works in system emulation mode,
when in userspace emulation mode the old behaviour is preserved.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Message-Id: <1487092068-16562-3-git-send-email-imbrenda@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Claudio Imbrenda
2d76e82395 move vm_start to cpus.c
This patch:

* moves vm_start to cpus.c.
* exports qemu_vmstop_requested, since it's needed by vm_start.
* extracts vm_prepare_start from vm_start; it does what vm_start did,
  except restarting the cpus.
* vm_start now calls vm_prepare_start and then restarts the cpus.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Message-Id: <1487092068-16562-2-git-send-email-imbrenda@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Ed Swierk
1c64fdbc81 char: drop data written to a disconnected pty
When a serial port writes data to a pty that's disconnected, drop the
data and return the length dropped. This avoids triggering pointless
retries in callers like the 16550A serial_xmit(), and causes
qemu_chr_fe_write() to write all data to the log file, rather than
logging only while a pty client like virsh console happens to be
connected.

Signed-off-by: Ed Swierk <eswierk@skyportsystems.com>
Message-Id: <1485870329-79428-1-git-send-email-eswierk@skyportsystems.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Pavel Dovgalyuk
f65e821262 apic: reset apic_delivered global variable on machine reset
This patch adds call to apic_reset_irq_delivered when the virtual
machine is reset.

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-Id: <20170131114054.276.62201.stgit@PASHA-ISP>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Anton Nefedov
b0a335e351 qemu-char: socket backend: disconnect on write error
Socket backend read handler should normally perform a disconnect, however
the read handler may not get a chance to run if the frontend is not ready
(qemu_chr_be_can_write() == 0).

This means that in virtio-serial frontend case if
 - the host has disconnected (giving EPIPE on socket write)
 - and the guest has disconnected (-> frontend not ready -> backend
   will not read)
 - and there is still data (frontend->backend) to flush (has to be a really
   tricky timing but nevertheless, we have observed the case in production)

This results in virtio-serial trying to flush this data continiously forming
a busy loop.

Solution: react on write error in the socket write handler.
errno is not reliable after qio_channel_writev_full(), so we may not get
the exact EPIPE, so disconnect on any error but QIO_CHANNEL_ERR_BLOCK which
io_channel_send_full() converts to errno EAGAIN.
We must not disconnect right away though, there still may be data to read
(see 4bf1cb0).

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Daniel P. Berrange <berrange@redhat.com>
CC: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1486045589-8074-1-git-send-email-den@openvz.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Paolo Bonzini
a3fd46152e test-vmstate: remove yield_until_fd_readable
The function is not needed anymore now that migration is built on
top of QIOChannel.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Peter Xu
b7a4104b73 kvm/ioapic: correct kvm ioapic version
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1486106298-3699-4-git-send-email-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Peter Xu
8d5516be12 ioapic: fix error report value of def version
It should be 0x20, rather than 0x11.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1486106298-3699-3-git-send-email-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Peter Xu
c6fcb0e201 kvm/ioapic: dump real object instead of a fake one
When we do "info ioapic" for kvm ioapic, we were building up a temporary
ioapic object. Let's fetch the real one and update correspond to the
real object as well.

This fixes printing uninitialized version field in
ioapic_print_redtbl().

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1486106298-3699-2-git-send-email-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Peter Maydell
ca5266de6c Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Wed 15 Feb 2017 03:46:59 GMT
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  net: e1000e: fix an infinite loop issue
  net: imx: limit buffer descriptor count
  colo-compare: sort TCP packet queue by sequence number
  net: e1000e: fix dead code in e1000e_write_packet_to_guest
  net: Mark 'vlan' parameter as deprecated

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-16 12:36:24 +00:00
Li Qiang
4154c7e03f net: e1000e: fix an infinite loop issue
This issue is like the issue in e1000 network card addressed in
this commit:
e1000: eliminate infinite loops on out-of-bounds transfer start.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Reviewed-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-02-15 11:18:57 +08:00
Prasad J Pandit
81f17e0d43 net: imx: limit buffer descriptor count
i.MX Fast Ethernet Controller uses buffer descriptors to manage
data flow to/fro receive & transmit queues. While transmitting
packets, it could continue to read buffer descriptors if a buffer
descriptor has length of zero and has crafted values in bd.flags.
Set an upper limit to number of buffer descriptors.

Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-02-15 11:18:57 +08:00
Zhang Chen
a935cc3132 colo-compare: sort TCP packet queue by sequence number
Improve efficiency of TCP packet comparison.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-02-15 11:18:57 +08:00
Paolo Bonzini
e514fc7e12 net: e1000e: fix dead code in e1000e_write_packet_to_guest
Because is_first is declared inside a loop, it is always true.  The store
is dead, and so is the "else" branch of "if (is_first)".  is_last is
okay though.

Reported by Coverity.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-02-15 11:18:57 +08:00
Thomas Huth
a2dbe1356f net: Mark 'vlan' parameter as deprecated
The 'vlan' parameter is a continuous source of confusion for the users,
many people mix it up with the more common term VLAN (the link layer
packet encapsulation), and even if they realize that the QEMU 'vlan' is
rather some kind of network hub emulation, there is still a high risk
that they configure their QEMU networking in a wrong way with this
parameter (e.g. by hooking NICs together, so they get a 'loopback'
between one and the other NIC).
Thus at one point in time, we should finally get rid of the 'vlan'
feature in QEMU. Let's do a first step in this direction by declaring
the 'vlan' parameter as deprecated and informing the users to use the
'netdev' parameter instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-02-15 11:18:57 +08:00
Laurent Vivier
a1488b8661 linux-user: manage two new IFLA host message types
Add QEMU_IFLA_GSO_MAX_SEGS and QEMU_IFLA_GSO_MAX_SIZE
in host_to_target_data_link_rtattr().

These two messages are sent by the host kernel when
we use "sudo".

Found with qemu-m68k and Debian etch-m68k (sudo 1.6.8p12-4) and
host kernel 4.7.6-200.fc24.x86_64

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <1477530049-15676-1-git-send-email-laurent@vivier.eu>
2017-02-14 18:08:11 +01:00
Lena Djokic
2640077527 linux-user: Fix mq_open
If fourth argument is NULL it should be passed without
using lock_user function which would, in that case, return
EFAULT, and system call supports passing NULL as fourth argument.

Signed-off-by: Lena Djokic <Lena.Djokic@rt-rk.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Lena Djokic
77c6850fd7 linux-user: Fix readahead
Calculation of 64-bit offset was not correct for all cases.

Signed-off-by: Lena Djokic <Lena.Djokic@rt-rk.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Lena Djokic
fea243e90a linux-user: Fix inotify_init1 support
This commit adds necessary conversion of argument passed to inotify_init1.
inotify_init1 flags can be IN_NONBLOCK and IN_CLOEXEC which rely on O_NONBLOCK
and O_CLOEXEC and those can have different values on different platforms.

Signed-off-by: Lena Djokic <Lena.Djokic@rt-rk.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Richard Henderson
6cde51769e linux-user: Fix s390x safe-syscall for z900
The LT instruction was added in the extended immediate facility
introduced with the z9-109 processor.

Cc: Riku Voipio <riku.voipio@iki.fi>
Reported-by: Michael Tokarev <mjt@tls.msk.ru>
Fixes: c9bc3437a9
Suggested-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Riku Voipio
5fbf66e6a1 linux-user: drop __cygwin__ ifdef
linux-user doesn't work on cygwin anyways.

Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Riku Voipio
b9a0be9239 linux-user: remove ifdef __USER_MISC
This preprocessor macro isn't set anywhere. Remove
the check so -strace can show these options.

Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Peter Maydell
5dae13cd71 Merge remote-tracking branch 'remotes/rth/tags/pull-or-20170214' into staging
Queued openrisc patches

# gpg: Signature made Mon 13 Feb 2017 21:21:03 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-or-20170214: (24 commits)
  target/openrisc: Optimize for r0 being zero
  target/openrisc: Tidy handling of delayed branches
  target/openrisc: Tidy ppc/npc implementation
  target/openrisc: Optimize l.jal to next
  target/openrisc: Fix madd
  target/openrisc: Implement muld, muldu, macu, msbu
  target/openrisc: Represent MACHI:MACLO as a single unit
  target/openrisc: Implement msync
  target/openrisc: Enable trap, csync, msync, psync for user mode
  target/openrisc: Set flags on helpers
  target/openrisc: Use movcond where appropriate
  target/openrisc: Keep SR_CY and SR_OV in a separate variables
  target/openrisc: Keep SR_F in a separate variable
  target/openrisc: Invert the decoding in dec_calc
  target/openrisc: Put SR[OVE] in TB flags
  target/openrisc: Streamline arithmetic and OVE
  target/openrisc: Rationalize immediate extraction
  target/openrisc: Tidy insn dumping
  target/openrisc: Implement lwa, swa
  target/openrisc: Fix exception handling status registers
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-14 09:55:48 +00:00
Richard Henderson
6597c28d61 target/openrisc: Optimize for r0 being zero
The HW does not special-case r0, but the ABI specifies that r0 should
contain 0.  If we expose this fact to the optimizer, we can simplify
a lot of the generated code.  We must of course verify that r0==0, but
that is trivial to do with a TB flag.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
a01deb36a6 target/openrisc: Tidy handling of delayed branches
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
24c328521b target/openrisc: Tidy ppc/npc implementation
The NPC SPR is really only supposed to be used for FPGA debugging.
It contains the same contents as PC, unless one plays games.  Follow
the or1ksim implementation in flushing delayed branch state when it
is changed.

The PPC SPR need not be updated every instruction, merely when we
exit the TB or attempt to read its contents.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
a8000cb480 target/openrisc: Optimize l.jal to next
This allows the tcg optimizer to see, and fold, all of the
constants involved in a GOT base register load sequence.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
762e22edcd target/openrisc: Fix madd
Note that the specification for lf.madd.s is confused.  It's
the only mention of supposed FPMADDHI/FPMADDLO special registers.
On the other hand, or1ksim implements a somewhat normal non-fused
multiply and add.  Mirror that.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
cc5de49ebe target/openrisc: Implement muld, muldu, macu, msbu
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
6f7332ba71 target/openrisc: Represent MACHI:MACLO as a single unit
Significantly simplifies the implementation of the use of MAC.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
24fc5c0feb target/openrisc: Implement msync
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
20dc52a37c target/openrisc: Enable trap, csync, msync, psync for user mode
Not documented as disabled for user mode.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
9fba702bd4 target/openrisc: Set flags on helpers
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
784696d119 target/openrisc: Use movcond where appropriate
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
9745807191 target/openrisc: Keep SR_CY and SR_OV in a separate variables
This significantly streamlines carry and overflow production.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
84775c43f3 target/openrisc: Keep SR_F in a separate variable
This avoids having to keep merging and extracting the flag from SR.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
cf2ae4428f target/openrisc: Invert the decoding in dec_calc
Decoding the opcodes in the right order reduces by 100+ lines.
Also, it happens to put the opcodes in the same order as Chapter 17.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
0c53d7342b target/openrisc: Put SR[OVE] in TB flags
Removes a call at execution time for overflow exceptions.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
9ecaa27e71 target/openrisc: Streamline arithmetic and OVE
Fix incorrect overflow calculation.  Move overflow exception check
to a helper function, to eliminate inline branches.  Remove some
incorrect special casing of R0.  Implement multiply inline.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
6da544a6c4 target/openrisc: Rationalize immediate extraction
The architecture manual is consistent in using "I" for signed
fields and "K" for unsigned fields.  Mirror that.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
111ece5133 target/openrisc: Tidy insn dumping
Avoids warnings from unused variables etc.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
930c3d0074 target/openrisc: Implement lwa, swa
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Stafford Horne
c56e3b8670 target/openrisc: Fix exception handling status registers
I am working on testing instruction emulation patches for the linux
kernel. During testing I found these 2 issues:

 - sets DSX (delay slot exception) but never clears it
 - EEAR for illegal insns should point to the bad exception (as per
   openrisc spec) but its not

This patch fixes these two issues by clearing the DSX flag when not in a
delay slot and by setting EEAR to exception PC when handling illegal
instruction exceptions.

After this patch the openrisc kernel with latest patches boots great on
qemu and instruction emulation works.

Cc: qemu-trivial@nongnu.org
Cc: openrisc@lists.librecores.org
Signed-off-by: Stafford Horne <shorne@gmail.com>
Message-Id: <20170113220028.29687-1-shorne@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
c40413a65e linux-user: Honor CLONE_SETTLS for openrisc
Threads work much better when you set the TLS register.
This was fixed in the upstream kernel for Linux 4.9.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
a0adc417a0 linux-user: Fix openrisc cpu_loop
We need to handle EXCP_DEBUG and EXCP_INTERRUPT.
We need to send signals to the guest using queue_signal.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:58 +11:00
Richard Henderson
ab90233855 linux-user: Add MMAP_SHIFT for openrisc
The page size on openrisc is 8k.  Sync the shift
required for the mmap2 syscall.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:58 +11:00
Richard Henderson
4a09d0bb34 target/openrisc: Rename the cpu from or32 to or1k
This is in keeping with the toolchain and or1ksim.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:58 +11:00
Peter Maydell
ec7a9bd5bb Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20170213a' into staging
Migration

  Amit: migration: remove myself as maintainer
        MAINTAINERS: update my email address
  Ashijeet: migrate: Introduce zero RAM checks to skip RAM migration
  Pavel: Postcopy release RAM
  Halil: consolidate VMStateField.start
  Hailiang: COLO: fix setting checkpoint-delay not working properly
         COLO: Shutdown related socket fd while do failover
         COLO: Don't process failover request while loading VM's state
  Me:
     migration: Add VMSTATE_UNUSED_VARRAY_UINT32
     migration: Add VMSTATE_WITH_TMP
     tests/migration: Add test for VMSTATE_WITH_TMP
     virtio-net VMState conversion and new VMSTATE macros

# gpg: Signature made Mon 13 Feb 2017 17:36:39 GMT
# gpg:                using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20170213a:
  virtio/migration: Migrate virtio-net to VMState
  tests/migration: Add test for VMSTATE_WITH_TMP
  migration: Add VMSTATE_WITH_TMP
  migration: Add VMSTATE_UNUSED_VARRAY_UINT32
  COLO: Don't process failover request while loading VM's state
  COLO: Shutdown related socket fd while do failover
  COLO: fix setting checkpoint-delay not working properly
  migration: consolidate VMStateField.start
  migrate: Introduce zero RAM checks to skip RAM migration
  migration: discard non-dirty ram pages after the start of postcopy
  add 'release-ram' migrate capability
  migration: add MigrationState arg for ram_save_/compressed_/page()
  MAINTAINERS: update my email address
  migration: remove myself as maintainer

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-13 18:49:26 +00:00
Dr. David Alan Gilbert
982b78c5e3 virtio/migration: Migrate virtio-net to VMState
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20170203160651.19917-5-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
  Merge fix against Halil's removal of the '_start' field in
    VMSTATE_VBUFFER_MULTIPLY
2017-02-13 17:27:14 +00:00
Dr. David Alan Gilbert
5c379d9031 tests/migration: Add test for VMSTATE_WITH_TMP
Add a test for VMSTATE_WITH_TMP to tests/test-vmstate.c

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170203160651.19917-4-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:14 +00:00
Dr. David Alan Gilbert
bcf4513129 migration: Add VMSTATE_WITH_TMP
VMSTATE_WITH_TMP is for handling structures where some calculation
or rearrangement of the data needs to be performed before the data
hits the wire.
For example,  where the value on the wire is an offset from a
non-migrated base, but the data in the structure is the actual pointer.

To use it, a temporary type is created and a vmsd used on that type.
The first element of the type must be 'parent' a pointer back to the
type of the main structure.  VMSTATE_WITH_TMP takes care of allocating
and freeing the temporary before running the child vmsd.

The post_load/pre_save on the child vmsd can copy things from the parent
to the temporary using the parent pointer and do any other calculations
needed; it can then use normal VMSD entries to do the actual data
storage without having to fiddle around with qemu_get_*/qemu_put_*

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170203160651.19917-3-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:14 +00:00
Dr. David Alan Gilbert
b5b5c56957 migration: Add VMSTATE_UNUSED_VARRAY_UINT32
VMSTATE_UNUSED_VARRAY_UINT32 is used to skip a chunk of the stream
that's an n-element array;  note the array size and the dynamic value
read never get multiplied so there's no overflow risk.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170203160651.19917-2-dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:14 +00:00
zhanghailiang
a8664ba510 COLO: Don't process failover request while loading VM's state
We should not do failover work while the main thread is loading
VM's state. Otherwise the consistent of VM's memory and
device state will be broken.

We will restart the loading process after jump over the stage,
The new failover status 'RELAUNCH' will help to record if we
need to restart the process.

Cc: Eric Blake <eblake@redhat.com>
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1484657864-21708-4-git-send-email-zhang.zhanghailiang@huawei.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
   Added a missing '(Since 2.9)'
2017-02-13 17:27:13 +00:00
zhanghailiang
c937b9a6db COLO: Shutdown related socket fd while do failover
If the net connection between primary host and secondary host breaks
while COLO/COLO incoming threads are doing read() or write().
It will block until connection is timeout, and the failover process
will be blocked because of it.

So it is necessary to shutdown all the socket fds used by COLO
to avoid this situation. Besides, we should close the corresponding
file descriptors after failvoer BH shutdown them,
Or there will be an error.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1484657864-21708-3-git-send-email-zhang.zhanghailiang@huawei.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
zhanghailiang
479125d53e COLO: fix setting checkpoint-delay not working properly
If we set checkpoint-delay through command 'migrate-set-parameters',
It will not take effect until we finish last sleep chekpoint-delay,
That's will be offensive espeically when we want to change its value
from an extreme big one to a proper value.

Fix it by using timer to realize checkpoint-delay.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Message-Id: <1484657864-21708-2-git-send-email-zhang.zhanghailiang@huawei.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Halil Pasic
59046ec29a migration: consolidate VMStateField.start
The member VMStateField.start is used for two things, partial data
migration for VBUFFER data (basically provide migration for a
sub-buffer) and for locating next in QTAILQ.

The implementation of the VBUFFER feature is broken when VMSTATE_ALLOC
is used. This however goes unnoticed because actually partial migration
for VBUFFER is not used at all.

Let's consolidate the usage of VMStateField.start by removing support
for partial migration for VBUFFER.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>

Message-Id: <20170203175217.45562-1-pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Ashijeet Acharya
0827b9e97d migrate: Introduce zero RAM checks to skip RAM migration
Migration of a "none" machine with no RAM crashes abruptly as
bitmap_new() fails and thus aborts. Instead place zero RAM checks at
appropriate places to skip migration of RAM in this case and complete
migration successfully for devices only.

Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
Message-Id: <1486564125-31366-1-git-send-email-ashijeetacharya@gmail.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Pavel Butsykin
ced1c6166e migration: discard non-dirty ram pages after the start of postcopy
After the start of postcopy migration there are some non-dirty pages which have
already been migrated. These pages are no longer needed on the source vm so that
we can free them and it doen't hurt to complete the migration.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Message-Id: <20170203152321.19739-4-pbutsykin@virtuozzo.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Pavel Butsykin
53f09a1076 add 'release-ram' migrate capability
This feature frees the migrated memory on the source during postcopy-ram
migration. In the second step of postcopy-ram migration when the source vm
is put on pause we can free unnecessary memory. It will allow, in particular,
to start relaxing the memory stress on the source host in a load-balancing
scenario.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Message-Id: <20170203152321.19739-3-pbutsykin@virtuozzo.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
   Manually merged in Pavel's 'migration: madvise error_report fixup!'
2017-02-13 17:27:13 +00:00
Pavel Butsykin
9eb1476610 migration: add MigrationState arg for ram_save_/compressed_/page()
Cosmetic patch. The use of ms variable instead of migrate_get_current()
looks nicer, especially when there reuse.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Message-Id: <20170203152321.19739-2-pbutsykin@virtuozzo.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Amit Shah
cee887d969 MAINTAINERS: update my email address
I'm leaving my job at Red Hat, this email address will stop working next week.
Update it to one that I will have access to later.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1486120433-11628-1-git-send-email-amit.shah@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Amit Shah
c77a6c8dd7 migration: remove myself as maintainer
I'm switching jobs, and I'm not sure I can continue maintaining migration.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1486120416-11566-1-git-send-email-amit.shah@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Peter Maydell
305e6c8a2f Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Mon 13 Feb 2017 16:29:26 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  Makefile: Make "install" depend on "trace-events-all"
  docs: update manpage for stderr->log rename

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-13 16:44:04 +00:00
Fam Zheng
6eab3544f4 Makefile: Make "install" depend on "trace-events-all"
We install this file to data dir but since 0ab8ed18 it's no longer
required by any objects during "make". List it explicitly as a depended
target of install and fix the broken "make install" command.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 20170204143245.15974-1-famz@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-13 13:38:31 +00:00
Philipp Gesang
20f8a1392f docs: update manpage for stderr->log rename
With commit ed7f5f1d8d the name of
this backend changed from “stderr” to “log”.

Signed-off-by: Philipp Gesang <philipp.gesang@intra2net.com>
Message-id: 20170202114101.2655-1-philipp.gesang@intra2net.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-13 13:38:31 +00:00
Peter Maydell
df96bfab49 Merge remote-tracking branch 'remotes/kraxel/tags/pull-vga-20170213-1' into staging
vga: bugfixes for cirrus and virtio-gpu

# gpg: Signature made Mon 13 Feb 2017 08:14:47 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-vga-20170213-1:
  Revert "cirrus: allow zero source pitch in pattern fill rops"
  cirrus: fix patterncopy checks
  cirrus: replace debug printf with trace points
  vga: replace debug printf with trace points
  virtio-gpu: fix resource leak in virgl_cmd_resource_unref
  virtio-gpu: fix memory leak in set scanout

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-13 10:54:49 +00:00
Peter Maydell
0b4384d0bb Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2017-02-12' into staging
Block patches

# gpg: Signature made Sun 12 Feb 2017 01:26:20 GMT
# gpg:                using RSA key 0xF407DB0061D5CF40
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* remotes/maxreitz/tags/pull-block-2017-02-12: (21 commits)
  qemu-img: Avoid setting ret to unused value in img_convert()
  qemu-img: Use qemu_strtoul() rather than raw strtoul()
  qemu-io: don't allow I/O operations larger than BDRV_REQUEST_MAX_BYTES
  qcow2: Optimize the refcount-block overlap check
  qemu-io: Add failure regression tests
  qemu-iotests: Add _unsupported_fmt helper
  qemu-io: Return non-zero exit code on failure
  block/nfs: fix naming of runtime opts
  block/nfs: fix NULL pointer dereference in URI parsing
  block: bdrv_invalidate_cache: invalidate children first
  block/qapi: reduce the execution time of qmp_query_blockstats
  block/qapi: reduce the coupling between the bdrv_query_stats and bdrv_query_bds_stats
  qemu-iotest: test to lookup protocol-based image with relative backing
  qemu-iotests: Don't create fifos / pidfiles with protocol paths
  block: check full backing filename when searching protocol filenames
  block/vmdk: Fix the endian problem of buf_len and lba
  iotests: record separate timings per format,protocol pair
  iotests: Fix reference output for 059
  qapi: Tweak error message of bdrv_query_image_info
  qemu-img: Improve commit invalid base message
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-13 10:16:23 +00:00
Peter Maydell
ed3d90df7c Merge remote-tracking branch 'remotes/awilliam/tags/vfio-updates-20170210.0' into staging
VFIO updates 2017-02-10

 - Fix GTT wrap-around for Skylake IGD assignment (Alex Williamson)
 - Tag vfio-pci-igd-lpc-bridge as bridge device category (Thomas Huth)
 - Don't build calxeda-xgmac or amd-xgbe except on ARM (Thomas Huth)

# gpg: Signature made Fri 10 Feb 2017 21:34:33 GMT
# gpg:                using RSA key 0x239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B  8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-updates-20170210.0:
  hw/vfio: Add CONFIG switches for calxeda-xgmac and amd-xgbe
  hw/vfio/pci-quirks: Set category of the "vfio-pci-igd-lpc-bridge" device
  vfio-pci: Fix GTT wrap-around for Skylake+ IGD

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-13 09:30:15 +00:00
Peter Maydell
10d6eda192 qemu-img: Avoid setting ret to unused value in img_convert()
Coverity points out that we assign the return value from
bdrv_snapshot_load_tmp() to 'ret' in img_convert(), but then
never use that variable. (We check for failure by looking
at local_err instead.) Drop the unused assignment, bringing
the call into line with the following call to
bdrv_snapshot_laod_tmp_by_id_or_name().

(Fixes CID 1247240.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1486744104-15590-3-git-send-email-peter.maydell@linaro.org
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:56:32 +01:00
Peter Maydell
8b3c679228 qemu-img: Use qemu_strtoul() rather than raw strtoul()
Some of the argument parsing in qemu-img uses strtoul() to parse
integer arguments.  This is tricky to get correct and in fact the
code does not get it right, because it assigns the result of
strtoul() to an 'int' variable and then tries to check for > INT_MAX.
Coverity correctly complains that the comparison is always false.

Rewrite to use qemu_strtoul(), which has a saner convention for
reporting conversion failures.

(Fixes CID 1356421, CID 1356422, CID 1356423.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1486744104-15590-2-git-send-email-peter.maydell@linaro.org
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:53:31 +01:00
Alberto Garcia
3026c4688c qemu-io: don't allow I/O operations larger than BDRV_REQUEST_MAX_BYTES
Passing a request size larger than BDRV_REQUEST_MAX_BYTES to any of the
I/O commands results in an error. While 'read' and 'write' handle the
error correctly, 'aio_read' and 'aio_write' hit an assertion:

blk_aio_read_entry: Assertion `rwco->qiov->size == acb->bytes' failed.

The reason is that the QEMU I/O code cannot handle request sizes
larger than BDRV_REQUEST_MAX_BYTES, so this patch makes qemu-io check
that all values are within range.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: 79f66648c685929a144396bda24d13a207131dcf.1485878688.git.berto@igalia.com
[mreitz: Use BDRV_REQUEST_MAX_BYTES instead of INT_MAX]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:43 +01:00
Alberto Garcia
7061a07898 qcow2: Optimize the refcount-block overlap check
The metadata overlap checks introduced in a40f1c2add help detect
corruption in the qcow2 image by verifying that data writes don't
overlap with existing metadata sections.

The 'refcount-block' check in particular iterates over the refcount
table in order to get the addresses of all refcount blocks and check
that none of them overlap with the region where we want to write.

The problem with the refcount table is that since it always occupies
complete clusters its size is usually very big. With the default
values of cluster_size=64KB and refcount_bits=16 this table holds 8192
entries, each one of them enough to map 2GB worth of host clusters.

So unless we're using images with several TB of allocated data this
table is going to be mostly empty, and iterating over it is a waste of
CPU. If the storage backend is fast enough this can have an effect on
I/O performance.

This patch keeps the index of the last used (i.e. non-zero) entry in
the refcount table and updates it every time the table changes. The
refcount-block overlap check then uses that index instead of reading
the whole table.

In my tests with a 4GB qcow2 file stored in RAM this doubles the
amount of write IOPS.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170201123828.4815-1-berto@igalia.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:43 +01:00
Nir Soffer
bf68bcb18e qemu-io: Add failure regression tests
Add regression tests checking that qemu-io fails with non-zero exit code
when reading non-existing file or using the wrong image format.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Message-id: 20170201003120.23378-4-nirsof@gmail.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Nir Soffer
b4a2caa4bd qemu-iotests: Add _unsupported_fmt helper
This helper allows adding tests supporting any format expect the
specified formats. This may be useful to test that many formats behave
in a common way.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Message-id: 20170201003120.23378-3-nirsof@gmail.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Nir Soffer
b7aa131519 qemu-io: Return non-zero exit code on failure
The result of openfile was not checked, leading to failure deep in the
actual command with confusing error message, and exiting with exit code 0.

Here is a simple example - trying to read with the wrong format:

    $ touch file
    $ qemu-io -f qcow2 -c 'read -P 1 0 1024' file; echo $?
    can't open device file: Image is not in qcow2 format
    no file open, try 'help open'
    0

With this patch, we fail earlier with exit code 1:

    $ ./qemu-io -f qcow2 -c 'read -P 1 0 1024' file; echo $?
    can't open device file: Image is not in qcow2 format
    1

Failing earlier, we don't log this error now:

    no file open, try 'help open'

But some tests expected it; the line was removed from the test output.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170201003120.23378-2-nirsof@gmail.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Peter Lieven
f67409a5bb block/nfs: fix naming of runtime opts
commit 94d6a7a accidentally left the naming of runtime opts and QAPI
scheme inconsistent. As one consequence passing of parameters in the
URI is broken. Sync the naming of the runtime opts to the QAPI
scheme.

Please note that this is technically backwards incompatible with the 2.8
release, but the 2.8 release is the only version that had the wrong naming.
Furthermore release 2.8 suffered from a NULL pointer dereference during
URI parsing.

Fixes: 94d6a7a76e
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Lieven <pl@kamp.de>
Message-id: 1485942829-10756-3-git-send-email-pl@kamp.de
[mreitz: Fixed commit message]
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Peter Lieven
8d20abe87a block/nfs: fix NULL pointer dereference in URI parsing
parse_uint_full wants to put the parsed value into the
variable passed via its second argument which is NULL.

Fixes: 94d6a7a76e
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1485942829-10756-2-git-send-email-pl@kamp.de
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Vladimir Sementsov-Ogievskiy
16e977d506 block: bdrv_invalidate_cache: invalidate children first
Current implementation invalidates firstly parent bds and then its
children. This leads to the following bug:

after incoming migration, in bdrv_invalidate_cache_all:
1. invalidate parent bds - reopen it with BDRV_O_INACTIVE cleared
2. child is not yet invalidated
3. parent check that its BDRV_O_INACTIVE is cleared
4. parent writes to child
5. assert in bdrv_co_pwritev, as BDRV_O_INACTIVE is set for child

This patch fixes it by just changing invalidate sequence: invalidate
children first.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20170131112308.54189-1-vsementsov@virtuozzo.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Dou Liyang
a6baa60807 block/qapi: reduce the execution time of qmp_query_blockstats
In order to reduce the execution time, this patch optimize
the qmp_query_blockstats():
Remove the next_query_bds function.
Remove the bdrv_query_stats function.
Remove some judgement sentence.

The original qmp_query_blockstats calls next_query_bds to get
the next objects in each loops. In the next_query_bds, it checks
the query_nodes and blk. It also call bdrv_query_stats to get
the stats, In the bdrv_query_stats, it checks blk and bs each
times. This waste more times, which may stall the main loop a
bit. And if the disk is too many and donot use the dataplane
feature, this may affect the performance in main loop thread.

This patch removes that two functions, and makes the structure
clearly.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Message-id: 1484467275-27919-3-git-send-email-douly.fnst@cn.fujitsu.com
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[mreitz: Removed duplicate info->value assignment]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Dou Liyang
20a6d768f5 block/qapi: reduce the coupling between the bdrv_query_stats and bdrv_query_bds_stats
The bdrv_query_stats and bdrv_query_bds_stats functions need to call
each other, that increases the coupling. it also makes the program
complicated and makes some unnecessary tests.

Remove the call from bdrv_query_bds_stats to bdrv_query_stats, just
take some recursion to make it clearly.

Avoid testing whether the blk is NULL during querying the bds stats.
It is unnecessary.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Message-id: 1484467275-27919-2-git-send-email-douly.fnst@cn.fujitsu.com
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Jeff Cody
256e3b6387 qemu-iotest: test to lookup protocol-based image with relative backing
This test uses NFS and block-stream to force a lookup of a backing
image that has a relative filename, but a full backing image name
with the protocol path intact.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Message-id: 1a7a3d6e6d8af36cd5b47ed6ea93b5a9ededf81b.1485392617.git.jcody@redhat.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Jeff Cody
846a1d118e qemu-iotests: Don't create fifos / pidfiles with protocol paths
Trying to create, use, and remove fifos and pidfiles on protocol paths
(e.g. nfs://localhost/scratch/qemu-nbd.pid) is obviously broken.

Use the local $TEST_DIR path before it is 'protocolized' for these
files.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Message-id: bb4a731a35bc4ac81fe3db17479dd686315317c7.1485392617.git.jcody@redhat.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Jeff Cody
418661e032 block: check full backing filename when searching protocol filenames
In bdrv_find_backing_image(), if we are searching an image for a backing
file that contains a protocol, we currently only compare unmodified
paths.

However, some management software will change the backing filename to be
a relative filename in a path.  QEMU is able to handle this fine,
because internally it will use path_combine to put together the full
protocol URI.

However, this can lead to an inability to match an image during a QAPI
command that needs to use bdrv_find_backing_image() to find the image,
when it is searched by the full URI.

When searching for a protocol filename, if the straight comparison
fails, this patch will also compare against the full backing filename to
see if that is a match.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Message-id: c2d025adca8a2b665189e6f4cf080f44126d0b6b.1485392617.git.jcody@redhat.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
QingFeng Hao
4545d4f4af block/vmdk: Fix the endian problem of buf_len and lba
The problem was triggered by qemu-iotests case 055. It failed when it
was comparing the compressed vmdk image with original test.img.

The cause is that buf_len in vmdk_write_extent wasn't converted to
little-endian before it was stored to disk. But later vmdk_read_extent
read it and converted it from little-endian to cpu endian.
If the cpu is big-endian like s390, the problem will happen and
the data length read by vmdk_read_extent will become invalid!
The fix is to add the conversion in vmdk_write_extent, meanwhile,
repair the endianness problem of lba field which shall also be converted
to little-endian before storing to disk.

Cc: qemu-stable@nongnu.org
Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Signed-off-by: Jing Liu <liujbjl@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20161216052040.53067-2-haoqf@linux.vnet.ibm.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Daniel P. Berrange
36bd422812 iotests: record separate timings per format,protocol pair
The 'check' program records timings for each test that
is run. These timings are only valid, however, for a
particular format/protocol combination. So if frequently
running 'check' with a variety of different formats or
protocols, the times printed can be very misleading.

Instead of having a single 'check.time' file, maintain
multiple 'check.time-$IMGPROTO-$IMGFMT' files.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170103160556.9895-1-berrange@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Fam Zheng
53b63460f6 iotests: Fix reference output for 059
It was broken by efaa7c4eeb when it dropped the device name "image"
from BB API.  Now this error message text is updated again, sync it up.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 20170119130759.28319-3-famz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:41 +01:00
Fam Zheng
9adceb0213 qapi: Tweak error message of bdrv_query_image_info
@bs doesn't always have a device name, such as when it comes from
"qemu-img info". Report file name instead.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 20170119130759.28319-2-famz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:41 +01:00
Max Reitz
6b33f3ae8b qemu-img: Improve commit invalid base message
When trying to invoke qemu-img commit with a base image file name that
is not part of the top image's backing chain, the user receives a rather
plain "Base not found" error message. This is not really helpful because
it does not explain what "not found" means, potentially leaving the user
wondering why qemu cannot find a file despite it clearly existing in the
file system.

Improve the error message by clarifying that "not found" means "not
found in the top image's backing chain".

Reported-by: Ala Hino <ahino@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20161201020508.24417-1-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:41 +01:00
QingFeng Hao
b135233b0d iotests: Fix a problem in common.filter
If TEST_DIR is set to /tmp, test case 144 will fail. The reason is that
TEST_DIR resembles 144's test image name tmp.qcow2.
When 144 is testing $TEST_DIR/tmp.qcow2, it wants to replace
$TEST_DIR/tmp.qcow2 to TEST_DIR/tmp.qcow2, but actually it will fail
and get TEST_DIRTEST_DIR.qcow2 in this case.
The fix is just to modify the code to replace $TEST_DIR/ with TEST_DIR/.

Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Message-id: 20161216054723.96055-2-haoqf@linux.vnet.ibm.com
Reviewed-by: Eric Blake <eblake@redhat.com>
[mreitz: Fixed commit message and dropped superfluous escaping]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:41 +01:00
Thomas Huth
e197de50c6 hw/vfio: Add CONFIG switches for calxeda-xgmac and amd-xgbe
Both devices seem to be specific to the ARM platform. It's confusing
for the users if they show up on other target architectures, too
(e.g. when the user runs QEMU with "-device ?" to get a list of
supported devices). Thus let's introduce proper configuration switches
so that the devices are only compiled and included when they are
really required.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-10 13:12:03 -07:00
Thomas Huth
f23363ea44 hw/vfio/pci-quirks: Set category of the "vfio-pci-igd-lpc-bridge" device
The device has "bridge" in its name, so it should obviously be in
the category DEVICE_CATEGORY_BRIDGE.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-10 13:12:03 -07:00
Alex Williamson
ac2a9862b7 vfio-pci: Fix GTT wrap-around for Skylake+ IGD
Previous IGD, up through Broadwell, only seem to write GTT values into
the first 1MB of space allocated for the BDSM, but clearly the GTT
can be multiple MB in size.  Our test in vfio_igd_quirk_data_write()
correctly filters out indexes beyond 1MB, but given the 1MB mask we're
using, we re-apply writes only to the first 1MB of the guest allocated
BDSM.

We can't assume either the host or guest BDSM is naturally aligned, so
we can't simply apply a different mask.  Instead, save the host BDSM
and do the arithmetic to subtract the host value to get the BDSM
offset and add it to the guest allocated BDSM.

Reported-by: Alexander Indenbaum <alexander.indenbaum@gmail.com>
Tested-by: Alexander Indenbaum <alexander.indenbaum@gmail.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-10 13:12:03 -07:00
Peter Maydell
6311b19b5c Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170210' into staging
target-arm queue:
 * aspeed: minor fixes
 * virt: declare fwcfg and virtio-mmio as DMA coherent in DT & ACPI
 * arm: enable basic TCG emulation of PMU for AArch64

# gpg: Signature made Fri 10 Feb 2017 18:06:30 GMT
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170210:
  aspeed/smc: use a modulo to check segment limits
  aspeed/smc: handle dummies only in fast read mode
  aspeed: remove useless comment on controller segment size
  aspeed: check for negative values returned by blk_getlength()
  hw/arm/virt: Declare fwcfg as dma cache coherent in dt
  hw/arm/virt: Declare fwcfg as dma cache coherent in ACPI
  hw/arm/virt: Declare virtio-mmio as dma cache coherent in ACPI
  target-arm: Declare virtio-mmio as dma-coherent in dt
  target-arm: Enable vPMU support under TCG mode
  target-arm: Add support for PMU register PMINTENSET_EL1
  target-arm: Add support for AArch64 PMU register PMXEVTYPER_EL0
  target-arm: Add support for PMU register PMSELR_EL0

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 18:54:30 +00:00
Peter Maydell
98b2faeaee Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into staging
# gpg: Signature made Fri 10 Feb 2017 16:47:54 GMT
# gpg:                using RSA key 0x7DEF8106AAFC390E
# gpg: Good signature from "John Snow (John Huston) <jsnow@redhat.com>"
# Primary key fingerprint: FAEB 9711 A12C F475 812F  18F2 88A9 064D 1835 61EB
#      Subkey fingerprint: F9B7 ABDB BCAC DF95 BE76  CBD0 7DEF 8106 AAFC 390E

* remotes/jnsnow/tags/ide-pull-request:
  ahci: advertise HOST_CAP_64

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 18:07:02 +00:00
Cédric Le Goater
b4cc583f02 aspeed/smc: use a modulo to check segment limits
The size of a segment is not necessarily a power of 2.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1486648058-520-5-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:30 +00:00
Cédric Le Goater
1a6d4fc27d aspeed/smc: handle dummies only in fast read mode
HW works fine in normal read mode with dummy bytes being set. So let's
check this case to not transfer bytes.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1486648058-520-4-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Cédric Le Goater
93bf276d5f aspeed: remove useless comment on controller segment size
The flash devices used for the FMC controller (BMC firmware) are well
defined for each Aspeed machine and are all smaller than the default
mapping window size, at least for CE0 which is the chip the SoC boots
from.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1486648058-520-3-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Cédric Le Goater
0c7209bee8 aspeed: check for negative values returned by blk_getlength()
write_boot_rom() does not check for negative values. This is more a
problem for coverity than the actual code as the size of the flash
device is checked when the m25p80 object is created. If there is
anything wrong with the backing file, we should not even reach that
path.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1486648058-520-2-git-send-email-clg@kaod.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Alexander Graf
14efdb5cb3 hw/arm/virt: Declare fwcfg as dma cache coherent in dt
Fw-cfg recently learned how to directly access guest memory and does so in
cache coherent fashion. Tell the guest about that fact when it's using DT.

Signed-off-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Message-id: 1486644810-33181-5-git-send-email-agraf@suse.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Alexander Graf
3b5c492b1c hw/arm/virt: Declare fwcfg as dma cache coherent in ACPI
Fw-cfg recently learned how to directly access guest memory and does so in
cache coherent fashion. Tell the guest about that fact when it's using ACPI.

Signed-off-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Message-id: 1486644810-33181-4-git-send-email-agraf@suse.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Alexander Graf
76266d9913 hw/arm/virt: Declare virtio-mmio as dma cache coherent in ACPI
Virtio-mmio devices can directly access guest memory and do so in cache
coherent fashion. Tell the guest about that fact when it's using ACPI.

Signed-off-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Message-id: 1486644810-33181-3-git-send-email-agraf@suse.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Alexander Graf
054bb7b215 target-arm: Declare virtio-mmio as dma-coherent in dt
QEMU emulated hardware is always dma coherent with its guest. We do
annotate that correctly on the PCI host controller, but left out
virtio-mmio.

Recent kernels have started to interpret that flag rather than take
dma coherency as granted with virtio-mmio. While that is considered
a kernel bug, as it breaks previously working systems, it showed that
our dt description is incomplete.

This patch adds the respective marker that allows guest OSs to evaluate
that our virtio-mmio devices are indeed cache coherent.

Signed-off-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Message-id: 1486644810-33181-2-git-send-email-agraf@suse.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Wei Huang
d6f02ce3b8 target-arm: Enable vPMU support under TCG mode
This patch contains several fixes to enable vPMU under TCG mode. It
first removes the checking of kvm_enabled() while unsetting
ARM_FEATURE_PMU. With it, the .pmu option can be used to turn on/off vPMU
under TCG mode. Secondly the PMU node of DT table is now created under TCG.
The last fix is to disable the masking of PMUver field of ID_AA64DFR0_EL1.

Signed-off-by: Wei Huang <wei@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1486504171-26807-5-git-send-email-wei@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:28 +00:00
Wei Huang
e6ec54571e target-arm: Add support for PMU register PMINTENSET_EL1
This patch adds access support for PMINTENSET_EL1.

Signed-off-by: Wei Huang <wei@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1486504171-26807-4-git-send-email-wei@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:28 +00:00
Wei Huang
fdb8665672 target-arm: Add support for AArch64 PMU register PMXEVTYPER_EL0
In order to support Linux perf, which uses PMXEVTYPER register,
this patch adds read/write access support for PMXEVTYPER. The access
is CONSTRAINED UNPREDICTABLE when PMSELR is not 0x1f. Additionally
this patch adds support for PMXEVTYPER_EL0.

Signed-off-by: Wei Huang <wei@redhat.com>
Message-id: 1486504171-26807-3-git-send-email-wei@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:28 +00:00
Wei Huang
6b0407805d target-arm: Add support for PMU register PMSELR_EL0
This patch adds support for AArch64 register PMSELR_EL0. The existing
PMSELR definition is revised accordingly.

Signed-off-by: Wei Huang <wei@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: Moved #ifndef CONFIG_USER_ONLY to cover new regdefs]
Message-id: 1486504171-26807-2-git-send-email-wei@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:28 +00:00
Ladi Prosek
98cb5dccb1 ahci: advertise HOST_CAP_64
The AHCI emulation code supports 64-bit addressing and should advertise this
fact in the Host Capabilities register. Both Linux and Windows drivers test
this bit to decide if the upper 32 bits of various registers may be written
to, and at least some versions of Windows have a bug where DMA is attempted
with an address above 4GB but, in the absence of HOST_CAP_64, the upper 32
bits are left unititialized which leads to a memory corruption.

[Maintainer edit:

This fixes https://bugzilla.redhat.com/show_bug.cgi?id=1411105,
which affects Windows Server 2008 SP2 in some cases.]

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Message-id: 1484305370-6220-1-git-send-email-lprosek@redhat.com
[Amended commit message --js]
Signed-off-by: John Snow <jsnow@redhat.com>
2017-02-10 11:47:11 -05:00
Gerd Hoffmann
12e97ec399 Revert "cirrus: allow zero source pitch in pattern fill rops"
This reverts commit 5858dd1801.

Conflicts:
	hw/display/cirrus_vga.c

Cc: Wolfgang Bumiller <w.bumiller@proxmox.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-id: 1486645341-5010-2-git-send-email-kraxel@redhat.com
2017-02-10 16:49:45 +01:00
Gerd Hoffmann
95280c31cd cirrus: fix patterncopy checks
The blit_region_is_unsafe checks don't work correctly for the
patterncopy source.  It's a fixed-sized region, which doesn't
depend on cirrus_blt_{width,height}.  So go do the check in
cirrus_bitblt_common_patterncopy instead, then tell blit_is_unsafe that
it doesn't need to verify the source.  Also handle the case where we
blit from cirrus_bitbuf correctly.

This patch replaces 5858dd1801.

Security impact:  I think for the most part error on the safe side this
time, refusing blits which should have been allowed.

Only exception is placing the blit source at the end of the video ram,
so cirrus_blt_srcaddr + 256 goes beyond the end of video memory.  But
even in that case I'm not fully sure this actually allows read access to
host memory.  To trick the commit 5858dd18 security checks one has to
pick very small cirrus_blt_{width,height} values, which in turn implies
only a fraction of the blit source will actually be used.

Cc: Wolfgang Bumiller <w.bumiller@proxmox.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-id: 1486645341-5010-1-git-send-email-kraxel@redhat.com
2017-02-10 16:49:45 +01:00
Gerd Hoffmann
ec87f206d7 cirrus: replace debug printf with trace points
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1486561893-26470-2-git-send-email-kraxel@redhat.com
2017-02-10 16:49:45 +01:00
Gerd Hoffmann
cf7dabeebc vga: replace debug printf with trace points
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1486561893-26470-1-git-send-email-kraxel@redhat.com
2017-02-10 16:49:45 +01:00
Gerd Hoffmann
5e8e3c4c75 virtio-gpu: fix resource leak in virgl_cmd_resource_unref
When the guest sends VIRTIO_GPU_CMD_RESOURCE_UNREF without detaching the
backing storage beforehand (VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING)
we'll leak memory.

This patch fixes it for 3d mode, simliar to the 2d mode fix in commit
"b8e2392 virtio-gpu: call cleanup mapping function in resource destroy".

Reported-by: 李强 <liqiang6-s@360.cn>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1485167210-4757-1-git-send-email-kraxel@redhat.com
2017-02-10 16:49:45 +01:00
Li Qiang
dd248ed7e2 virtio-gpu: fix memory leak in set scanout
In virtio_gpu_set_scanout function, when creating the 'rect'
its refcount is set to 2, by pixman_image_create_bits and
qemu_create_displaysurface_pixman function. This can lead
a memory leak issues. This patch avoid this issue.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 5884626f.5b2f6b0a.1bfff.3037@mx.google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-10 16:49:45 +01:00
Thomas Huth
61eedf7aec tests/prom-env: Ease time-out problems on slow hosts
Peter Maydell recently ran into time-out problems with the
prom-env test on a rather slow ARM board. To tackle this issue,
we can speed up the test by running QEMU with "-nodefaults" for
the pseries machine, so that SLOF has less devices to scan during
boot, and by using the "nvramrc" environment variable instead of
"boot-command", since this variable is evaluated earlier in the
boot process.
And to be really sure that we do not face such time out problems
again, let's also increase the time out value from 100s to 120s
instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-id: 1486739699-1076-1-git-send-email-thuth@redhat.com
Tested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 15:44:53 +00:00
Peter Maydell
33d076ebd0 Merge remote-tracking branch 'remotes/stsquad/tags/pull-travis-10022017-1' into staging
One minor fix and a build split to reduce timeouts.

# gpg: Signature made Fri 10 Feb 2017 14:46:52 GMT
# gpg:                using RSA key 0xFBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>"
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8  DF35 FBD0 DB09 5A9E 2A44

* remotes/stsquad/tags/pull-travis-10022017-1:
  .travis.yml: split VM based builds
  .travis.yml: don't specify CONFIG twice

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 15:05:37 +00:00
Alex Bennée
78a22af040 .travis.yml: split VM based builds
The Trusty based builds run a little slower than the main container
based ones. This is also true for the latest version of Clang. The
builds are getting very close (and occasionally run over) the 50 minute
timeout. Rather than partitioning by target I just split them into
linux-user and system builds.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-10 13:19:56 +00:00
Alex Bennée
fed5364971 .travis.yml: don't specify CONFIG twice
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-10 13:19:56 +00:00
Peter Maydell
8b1897725d Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20170209-2' into staging
vnc: add support for multiple listening sockets.
vnc: misc fixes and cleanups.

# gpg: Signature made Thu 09 Feb 2017 16:45:02 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-ui-20170209-2:
  ui: add ability to specify multiple VNC listen addresses
  util: add iterators for QemuOpts values
  ui: let VNC server listen on all resolved IP addresses
  ui: extract code to connect/listen from vnc_display_open
  ui: refactor code for populating SocketAddress from vnc_display_open
  ui: refactor VncDisplay to allow multiple listening sockets
  ui: fix reporting of VNC auth in query-vnc-servers
  ui: fix regression handling bare 'websocket' option to -vnc
  vnc: do not disconnect on EAGAIN
  ui/vnc: Drop unused vnc_has_job() and vnc_jobs_clear()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-09 16:58:39 +00:00
Peter Maydell
f073cd3a2b Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170207-1' into staging
target-arm:
 * new "unimplemented" device for stubbing out devices in a
   system model so accesses can be logged
 * stellaris: document the SoC memory map
 * arm: create instruction syndromes for AArch32 data aborts
 * arm: Correctly handle watchpoints for BE32 CPUs
 * Fix Thumb-1 BE32 execution and disassembly
 * arm: Add cfgend parameter for ARM CPU selection
 * sd: sdhci: check data length during dma_memory_read
 * aspeed: add a watchdog controller
 * integratorcp: adding vmstate for save/restore

# gpg: Signature made Tue 07 Feb 2017 19:20:19 GMT
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170207-1:
  stellaris: Use the 'unimplemented' device for parts we don't implement
  hw/misc: New "unimplemented" sysbus device
  stellaris: Document memory map and which SoC devices are unimplemented
  target/arm: A32, T32: Create Instruction Syndromes for Data Aborts
  target/arm: Abstract out pbit/wbit tests in ARM ldr/str decode
  arm: Correctly handle watchpoints for BE32 CPUs
  Fix Thumb-1 BE32 execution and disassembly.
  target/arm: Add cfgend parameter for ARM CPU selection.
  hw/arm/integratorcp: Support specifying features via -cpu
  sd: sdhci: check data length during dma_memory_read
  aspeed: add a watchdog controller
  wdt: Add Aspeed watchdog device model
  integratorcp: adding vmstate for save/restore

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 19:21:30 +00:00
Peter Maydell
aecfbbc97a stellaris: Use the 'unimplemented' device for parts we don't implement
Use the 'unimplemented' dummy device to cover regions of the
SoC device memory map which we don't have proper device
implementations for yet.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1484247815-15279-4-git-send-email-peter.maydell@linaro.org
2017-02-07 18:55:15 +00:00
Peter Maydell
f5095aa380 hw/misc: New "unimplemented" sysbus device
Create a new "unimplemented" sysbus device, which simply accepts
all read and write accesses, and implements them as read-as-zero,
write-ignored, with logging of the access as LOG_UNIMP.

This is useful for stubbing out bits of an SoC or board model
which haven't been written yet.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1484247815-15279-3-git-send-email-peter.maydell@linaro.org
2017-02-07 18:55:15 +00:00
Peter Maydell
394c8bbfb7 stellaris: Document memory map and which SoC devices are unimplemented
Add a comment documenting the memory map of the SoC devices and which
are not implemented.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1484247815-15279-2-git-send-email-peter.maydell@linaro.org
2017-02-07 18:55:15 +00:00
Peter Maydell
9bb6558a21 target/arm: A32, T32: Create Instruction Syndromes for Data Aborts
Add support for generating the ISS (Instruction Specific Syndrome)
for Data Abort exceptions taken from AArch32. These syndromes are
used by hypervisors for example to trap and emulate memory accesses.

This is the equivalent for AArch32 guests of the work done for AArch64
guests in commit aaa1f954d4.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-02-07 18:30:00 +00:00
Peter Maydell
63f26fcfda target/arm: Abstract out pbit/wbit tests in ARM ldr/str decode
In the ARM ldr/str decode path, rather than directly testing
"insn & (1 << 21)" and "insn & (1 << 24)", abstract these
bits out into wbit and pbit local flags. (We will want to
do more tests against them to determine whether we need to
provide syndrome information.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-02-07 18:29:59 +00:00
Julian Brown
4061200059 arm: Correctly handle watchpoints for BE32 CPUs
In BE32 mode, sub-word size watchpoints can fail to trigger because the
address of the access is adjusted in the opcode helpers before being
compared with the watchpoint registers.  This patch reverses the address
adjustment before performing the comparison with the help of a new CPUClass
hook.

This version of the patch augments and tidies up comments a little.

Signed-off-by: Julian Brown <julian@codesourcery.com>
Message-id: caaf64ffc72f6ae183015337b7afdbd4b8989cb6.1484929304.git.julian@codesourcery.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Julian Brown
f7478a92dd Fix Thumb-1 BE32 execution and disassembly.
Thumb-1 code has some issues in BE32 mode (as currently implemented). In
short, since bytes are swapped within words at load time for BE32
executables, this also swaps pairs of adjacent Thumb-1 instructions.

This patch un-swaps those pairs of instructions again, both for execution,
and for disassembly. (The previous version of the patch always read four
bytes in arm_read_memory_func and then extracted the proper two bytes,
in a probably misguided attempt to match the behaviour of actual hardware
as described by e.g. the ARM9TDMI TRM, section 3.3 "Endian effects for
instruction fetches". It's less complicated to just read the correct
two bytes though.)

Signed-off-by: Julian Brown <julian@codesourcery.com>
Message-id: ca20462a044848000370318a8bd41dd0a4ed273f.1484929304.git.julian@codesourcery.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Julian Brown
3a062d5730 target/arm: Add cfgend parameter for ARM CPU selection.
Add a new "cfgend" property which selects whether the CPU resets into
big-endian mode or not.  This setting affects whether we reset with
SCTLR_B (ARMv6 and earlier) or SCTLR_EE (ARMv7 and later) set.

Signed-off-by: Julian Brown <julian@codesourcery.com>
Message-id: 11420d1c49636c1790e60578ee996e51f0f0b835.1484929304.git.julian@codesourcery.com
[PMM: use error_report_err() rather than error_report();
 move the integratorcp changes to their own patch;
 drop an unnecessary extra #include;
 rephrase commit message accordingly;
 move setting of reset_sctlr above registration of cpregs
 so it actually has an effect]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Julian Brown
00909b5858 hw/arm/integratorcp: Support specifying features via -cpu
Since the integratorcp board creates the CPU object directly
rather than via cpu_arm_init(), we have to call the CPU
class parse_features() method ourselves if we want to
support the user passing features via the -cpu command
line argument as well as just the cpu name. Do so.

Signed-off-by: Julian Brown <julian@codesourcery.com>
[PMM: split out into its own patch]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Prasad J Pandit
42922105be sd: sdhci: check data length during dma_memory_read
While doing multi block SDMA transfer in routine
'sdhci_sdma_transfer_multi_blocks', the 's->fifo_buffer' starting
index 'begin' and data length 's->data_count' could end up to be same.
This could lead to an OOB access issue. Correct transfer data length
to avoid it.

Cc: qemu-stable@nongnu.org
Reported-by: Jiang Xin <jiangxin1@huawei.com>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 20170130064736.9236-1-ppandit@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Cédric Le Goater
013befe1ca aspeed: add a watchdog controller
This enables reboot of a guest from U-Boot and Linux.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Message-id: 1485452251-1593-3-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Cédric Le Goater
854123bf8d wdt: Add Aspeed watchdog device model
The Aspeed SoC includes a set of watchdog timers using 32-bit
decrement counters, which can be based either on the APB clock or
a 1 MHz clock.

The watchdog timer is designed to prevent system deadlock and, in
general, it should be restarted before timeout. When a timeout occurs,
different types of signals can be generated, ARM reset, SOC reset,
System reset, CPU Interrupt, external signal or boot from alternate
block. The current model only performs the system reset function as
this is used by U-Boot and Linux.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Message-id: 1485452251-1593-2-git-send-email-clg@kaod.org
[clg: - fixed compile breakage
      - fixed io region size
      - added watchdog_perform_action() on timer expiry
      - wrote a commit log
      - merged fixes from Andrew Jeffery to scale the reload value ]
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Pavel Dovgalyuk
26d3202207 integratorcp: adding vmstate for save/restore
VMState added by this patch preserves correct
loading of the integratorcp device state.

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-id: 20170131114310.6768.79416.stgit@PASHA-ISP
[PMM: removed unnecessary minimum_version_id_old lines]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:58 +00:00
477 changed files with 14210 additions and 5440 deletions

1
.gitignore vendored
View File

@@ -107,6 +107,7 @@ docs/qemu-ga-ref.info*
docs/qemu-qmp-ref.info*
/qemu-ga-qapi.texi
/qemu-qapi.texi
/version.texi
*.tps
.stgit-*
cscope.*

19
.shippable.yml Normal file
View File

@@ -0,0 +1,19 @@
language: c
env:
matrix:
- IMAGE=debian-armhf-cross
TARGET_LIST=arm-softmmu,arm-linux-user
- IMAGE=debian-arm64-cross
TARGET_LIST=aarch64-softmmu,aarch64-linux-user
build:
pre_ci:
- make docker-image-${IMAGE}
pre_ci_boot:
image_name: qemu
image_tag: ${IMAGE}
pull: false
options: "-e HOME=/root"
ci:
- unset CC
- ./configure ${QEMU_CONFIGURE_OPTS} --target-list=${TARGET_LIST}
- make -j2

View File

@@ -92,8 +92,8 @@ matrix:
- env: CONFIG=""
os: osx
compiler: clang
# Plain Trusty Build
- env: CONFIG=""
# Plain Trusty System Build
- env: CONFIG="--disable-linux-user"
sudo: required
addons:
dist: trusty
@@ -103,16 +103,45 @@ matrix:
- sudo apt-get build-dep -qq qemu
- wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
- git submodule update --init --recursive
# Trusty build with latest stable clang
- env: CONFIG=""
# Plain Trusty Linux User Build
- env: CONFIG="--disable-system"
sudo: required
addons:
dist: trusty
compiler: gcc
before_install:
- sudo apt-get update -qq
- sudo apt-get build-dep -qq qemu
- wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
- git submodule update --init --recursive
# Trusty System build with latest stable clang
- sudo: required
addons:
dist: trusty
language: generic
compiler: none
env:
- COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
- CONFIG="--cc=clang-3.9 --cxx=clang++-3.9"
- CONFIG="--disable-linux-user --cc=clang-3.9 --cxx=clang++-3.9"
before_install:
- wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
- sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
- sudo apt-get update -qq
- sudo apt-get install -qq -y clang-3.9
- sudo apt-get build-dep -qq qemu
- wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
- git submodule update --init --recursive
before_script:
- ./configure ${CONFIG} || cat config.log
# Trusty Linux User build with latest stable clang
- sudo: required
addons:
dist: trusty
language: generic
compiler: none
env:
- COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
- CONFIG="--disable-system --cc=clang-3.9 --cxx=clang++-3.9"
before_install:
- wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
- sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'

View File

@@ -561,20 +561,19 @@ F: hw/lm32/milkymist.c
M68K Machines
-------------
an5206
S: Orphan
M: Thomas Huth <huth@tuxfamily.org>
S: Odd Fixes
F: hw/m68k/an5206.c
F: hw/m68k/mcf5206.c
dummy_m68k
S: Orphan
F: hw/m68k/dummy_m68k.c
mcf5208
S: Orphan
M: Thomas Huth <huth@tuxfamily.org>
S: Odd Fixes
F: hw/m68k/mcf5208.c
F: hw/m68k/mcf_intc.c
F: hw/char/mcf_uart.c
F: hw/net/mcf_fec.c
F: include/hw/m68k/mcf*.h
MicroBlaze Machines
-------------------
@@ -1034,7 +1033,7 @@ F: hw/input/virtio-input*.c
F: include/hw/virtio/virtio-input.h
virtio-serial
M: Amit Shah <amit.shah@redhat.com>
M: Amit Shah <amit@kernel.org>
S: Supported
F: hw/char/virtio-serial-bus.c
F: hw/char/virtio-console.c
@@ -1043,7 +1042,7 @@ F: tests/virtio-console-test.c
F: tests/virtio-serial-test.c
virtio-rng
M: Amit Shah <amit.shah@redhat.com>
M: Amit Shah <amit@kernel.org>
S: Supported
F: hw/virtio/virtio-rng.c
F: include/hw/virtio/virtio-rng.h
@@ -1431,7 +1430,6 @@ F: scripts/checkpatch.pl
Migration
M: Juan Quintela <quintela@redhat.com>
M: Amit Shah <amit.shah@redhat.com>
M: Dr. David Alan Gilbert <dgilbert@redhat.com>
S: Maintained
F: include/migration/
@@ -1802,9 +1800,14 @@ F: docs/block-replication.txt
Build and test automation
-------------------------
M: Alex Bennée <alex.bennee@linaro.org>
M: Fam Zheng <famz@redhat.com>
L: qemu-devel@nongnu.org
S: Supported
S: Maintained
F: .travis.yml
F: .shippable.yml
F: tests/docker/
W: https://travis-ci.org/qemu/qemu
W: http://patchew.org/QEMU/
Documentation
-------------
@@ -1813,9 +1816,3 @@ M: Daniel P. Berrange <berrange@redhat.com>
S: Odd Fixes
F: docs/build-system.txt
Docker testing
--------------
Docker based testing framework and cases
M: Fam Zheng <famz@redhat.com>
S: Maintained
F: tests/docker/

View File

@@ -299,7 +299,11 @@ qemu-version.h: FORCE
printf '""\n'; \
fi; \
fi) > $@.tmp)
$(call quiet-command, cmp -s $@ $@.tmp || mv $@.tmp $@)
$(call quiet-command, if ! cmp -s $@ $@.tmp; then \
mv $@.tmp $@; \
else \
rm $@.tmp; \
fi)
config-host.h: config-host.h-timestamp
config-host.h-timestamp: config-host.mak
@@ -512,7 +516,7 @@ distclean: clean
rm -f qemu-doc.vr qemu-doc.txt
rm -f config.log
rm -f linux-headers/asm
rm -f qemu-ga-qapi.texi qemu-qapi.texi
rm -f qemu-ga-qapi.texi qemu-qapi.texi version.texi
rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
@@ -589,7 +593,7 @@ endif
endif
install: all $(if $(BUILD_DOCS),install-doc) \
install: all $(if $(BUILD_DOCS),install-doc) $(BUILD_DIR)/trace-events-all \
install-datadir install-localstatedir
ifneq ($(TOOLS),)
$(call install-prog,$(subst qemu-ga,qemu-ga$(EXESUF),$(TOOLS)),$(DESTDIR)$(bindir))
@@ -659,21 +663,24 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
# documentation
MAKEINFO=makeinfo
MAKEINFOFLAGS=--no-split --number-sections -D 'VERSION $(VERSION)'
TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'
MAKEINFOFLAGS=--no-split --number-sections
TEXIFLAG=$(if $(V),,--quiet)
%.html: %.texi
version.texi: $(SRC_PATH)/VERSION
$(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
%.html: %.texi version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--html $< -o $@,"GEN","$@")
%.info: %.texi
%.info: %.texi version.texi
$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
%.txt: %.texi
%.txt: %.texi version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--plaintext $< -o $@,"GEN","$@")
%.pdf: %.texi
%.pdf: %.texi version.texi
$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o $@,"GEN","$@")
qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool

View File

@@ -9,12 +9,8 @@ chardev-obj-y = chardev/
#######################################################################
# block-obj-y is code used by both qemu system emulation and qemu-img
block-obj-y = async.o thread-pool.o
block-obj-y += nbd/
block-obj-y += block.o blockjob.o
block-obj-y += main-loop.o iohandler.o qemu-timer.o
block-obj-$(CONFIG_POSIX) += aio-posix.o
block-obj-$(CONFIG_WIN32) += aio-win32.o
block-obj-y += block/
block-obj-y += qemu-io-cmds.o
block-obj-$(CONFIG_REPLICATION) += replication.o
@@ -125,6 +121,7 @@ trace-events-subdirs += crypto
trace-events-subdirs += io
trace-events-subdirs += migration
trace-events-subdirs += block
trace-events-subdirs += backends
trace-events-subdirs += hw/block
trace-events-subdirs += hw/block/dataplane
trace-events-subdirs += hw/char

View File

@@ -1,7 +1,7 @@
common-obj-y += rng.o rng-egd.o
common-obj-$(CONFIG_POSIX) += rng-random.o
common-obj-y += msmouse.o testdev.o
common-obj-y += msmouse.o wctablet.o testdev.o
common-obj-$(CONFIG_BRLAPI) += baum.o
baum.o-cflags := $(SDL_CFLAGS)

10
backends/trace-events Normal file
View File

@@ -0,0 +1,10 @@
# See docs/tracing.txt for syntax documentation.
# backends/wctablet.c
wct_init(void) ""
wct_cmd_re(void) ""
wct_cmd_st(void) ""
wct_cmd_sp(void) ""
wct_cmd_ts(int input) "0x%02x"
wct_cmd_other(const char *cmd) "%s"
wct_speed(int speed) "%d"

369
backends/wctablet.c Normal file
View File

@@ -0,0 +1,369 @@
/*
* QEMU Wacom Penpartner serial tablet emulation
*
* some protocol details:
* http://linuxwacom.sourceforge.net/wiki/index.php/Serial_Protocol_IV
*
* Copyright (c) 2016 Anatoli Huseu1
* Copyright (c) 2016,17 Gerd Hoffmann
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "ui/console.h"
#include "ui/input.h"
#include "trace.h"
#define WC_OUTPUT_BUF_MAX_LEN 512
#define WC_COMMAND_MAX_LEN 60
#define WC_L7(n) ((n) & 127)
#define WC_M7(n) (((n) >> 7) & 127)
#define WC_H2(n) ((n) >> 14)
#define WC_L4(n) ((n) & 15)
#define WC_H4(n) (((n) >> 4) & 15)
/* Model string and config string */
#define WC_MODEL_STRING_LENGTH 18
uint8_t WC_MODEL_STRING[WC_MODEL_STRING_LENGTH + 1] = "~#CT-0045R,V1.3-5,";
#define WC_CONFIG_STRING_LENGTH 8
uint8_t WC_CONFIG_STRING[WC_CONFIG_STRING_LENGTH + 1] = "96,N,8,0";
#define WC_FULL_CONFIG_STRING_LENGTH 61
uint8_t WC_FULL_CONFIG_STRING[WC_FULL_CONFIG_STRING_LENGTH + 1] = {
0x5c, 0x39, 0x36, 0x2c, 0x4e, 0x2c, 0x38, 0x2c,
0x31, 0x28, 0x01, 0x24, 0x57, 0x41, 0x43, 0x30,
0x30, 0x34, 0x35, 0x5c, 0x5c, 0x50, 0x45, 0x4e, 0x5c,
0x57, 0x41, 0x43, 0x30, 0x30, 0x30, 0x30, 0x5c,
0x54, 0x61, 0x62, 0x6c, 0x65, 0x74, 0x0d, 0x0a,
0x43, 0x54, 0x2d, 0x30, 0x30, 0x34, 0x35, 0x52,
0x2c, 0x56, 0x31, 0x2e, 0x33, 0x2d, 0x35, 0x0d,
0x0a, 0x45, 0x37, 0x29
};
/* This structure is used to save private info for Wacom Tablet. */
typedef struct {
Chardev parent;
QemuInputHandlerState *hs;
/* Query string from serial */
uint8_t query[100];
int query_index;
/* Command to be sent to serial port */
uint8_t outbuf[WC_OUTPUT_BUF_MAX_LEN];
int outlen;
int line_speed;
bool send_events;
int axis[INPUT_AXIS__MAX];
bool btns[INPUT_BUTTON__MAX];
} TabletChardev;
#define TYPE_CHARDEV_WCTABLET "chardev-wctablet"
#define WCTABLET_CHARDEV(obj) \
OBJECT_CHECK(TabletChardev, (obj), TYPE_CHARDEV_WCTABLET)
static void wctablet_chr_accept_input(Chardev *chr);
static void wctablet_shift_input(TabletChardev *tablet, int count)
{
tablet->query_index -= count;
memmove(tablet->query, tablet->query + count, tablet->query_index);
tablet->query[tablet->query_index] = 0;
}
static void wctablet_queue_output(TabletChardev *tablet, uint8_t *buf, int count)
{
if (tablet->outlen + count > sizeof(tablet->outbuf)) {
return;
}
memcpy(tablet->outbuf + tablet->outlen, buf, count);
tablet->outlen += count;
wctablet_chr_accept_input(CHARDEV(tablet));
}
static void wctablet_reset(TabletChardev *tablet)
{
/* clear buffers */
tablet->query_index = 0;
tablet->outlen = 0;
/* reset state */
tablet->send_events = false;
}
static void wctablet_queue_event(TabletChardev *tablet)
{
uint8_t codes[8] = { 0xe0, 0, 0, 0, 0, 0, 0 };
if (tablet->line_speed != 9600) {
return;
}
int newX = tablet->axis[INPUT_AXIS_X] * 0.1537;
int nexY = tablet->axis[INPUT_AXIS_Y] * 0.1152;
codes[0] = codes[0] | WC_H2(newX);
codes[1] = codes[1] | WC_M7(newX);
codes[2] = codes[2] | WC_L7(newX);
codes[3] = codes[3] | WC_H2(nexY);
codes[4] = codes[4] | WC_M7(nexY);
codes[5] = codes[5] | WC_L7(nexY);
if (tablet->btns[INPUT_BUTTON_LEFT]) {
codes[0] = 0xa0;
}
wctablet_queue_output(tablet, codes, 7);
}
static void wctablet_input_event(DeviceState *dev, QemuConsole *src,
InputEvent *evt)
{
TabletChardev *tablet = (TabletChardev *)dev;
InputMoveEvent *move;
InputBtnEvent *btn;
switch (evt->type) {
case INPUT_EVENT_KIND_ABS:
move = evt->u.abs.data;
tablet->axis[move->axis] = move->value;
break;
case INPUT_EVENT_KIND_BTN:
btn = evt->u.btn.data;
tablet->btns[btn->button] = btn->down;
break;
default:
/* keep gcc happy */
break;
}
}
static void wctablet_input_sync(DeviceState *dev)
{
TabletChardev *tablet = (TabletChardev *)dev;
if (tablet->send_events) {
wctablet_queue_event(tablet);
}
}
static QemuInputHandler wctablet_handler = {
.name = "QEMU Wacome Pen Tablet",
.mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_ABS,
.event = wctablet_input_event,
.sync = wctablet_input_sync,
};
static void wctablet_chr_accept_input(Chardev *chr)
{
TabletChardev *tablet = WCTABLET_CHARDEV(chr);
int len, canWrite;
canWrite = qemu_chr_be_can_write(chr);
len = canWrite;
if (len > tablet->outlen) {
len = tablet->outlen;
}
if (len) {
qemu_chr_be_write(chr, tablet->outbuf, len);
tablet->outlen -= len;
if (tablet->outlen) {
memmove(tablet->outbuf, tablet->outbuf + len, tablet->outlen);
}
}
}
static int wctablet_chr_write(struct Chardev *chr,
const uint8_t *buf, int len)
{
TabletChardev *tablet = WCTABLET_CHARDEV(chr);
unsigned int i, clen;
char *pos;
if (tablet->line_speed != 9600) {
return len;
}
for (i = 0; i < len && tablet->query_index < sizeof(tablet->query) - 1; i++) {
tablet->query[tablet->query_index++] = buf[i];
}
tablet->query[tablet->query_index] = 0;
while (tablet->query_index > 0 && (tablet->query[0] == '@' ||
tablet->query[0] == '\r' ||
tablet->query[0] == '\n')) {
wctablet_shift_input(tablet, 1);
}
if (!tablet->query_index) {
return len;
}
if (strncmp((char *)tablet->query, "~#", 2) == 0) {
/* init / detect sequence */
trace_wct_init();
wctablet_shift_input(tablet, 2);
wctablet_queue_output(tablet, WC_MODEL_STRING,
WC_MODEL_STRING_LENGTH);
return len;
}
/* detect line */
pos = strchr((char *)tablet->query, '\r');
if (!pos) {
pos = strchr((char *)tablet->query, '\n');
}
if (!pos) {
return len;
}
clen = pos - (char *)tablet->query;
/* process commands */
if (strncmp((char *)tablet->query, "RE", 2) == 0 &&
clen == 2) {
trace_wct_cmd_re();
wctablet_shift_input(tablet, 3);
wctablet_queue_output(tablet, WC_CONFIG_STRING,
WC_CONFIG_STRING_LENGTH);
} else if (strncmp((char *)tablet->query, "ST", 2) == 0 &&
clen == 2) {
trace_wct_cmd_st();
wctablet_shift_input(tablet, 3);
tablet->send_events = true;
wctablet_queue_event(tablet);
} else if (strncmp((char *)tablet->query, "SP", 2) == 0 &&
clen == 2) {
trace_wct_cmd_sp();
wctablet_shift_input(tablet, 3);
tablet->send_events = false;
} else if (strncmp((char *)tablet->query, "TS", 2) == 0 &&
clen == 3) {
unsigned int input = tablet->query[2];
uint8_t codes[7] = {
0xa3,
((input & 0x80) == 0) ? 0x7e : 0x7f,
(((WC_H4(input) & 0x7) ^ 0x5) << 4) | (WC_L4(input) ^ 0x7),
0x03,
0x7f,
0x7f,
0x00,
};
trace_wct_cmd_ts(input);
wctablet_shift_input(tablet, 4);
wctablet_queue_output(tablet, codes, 7);
} else {
tablet->query[clen] = 0; /* terminate line for printing */
trace_wct_cmd_other((char *)tablet->query);
wctablet_shift_input(tablet, clen + 1);
}
return len;
}
static int wctablet_chr_ioctl(Chardev *chr, int cmd, void *arg)
{
TabletChardev *tablet = WCTABLET_CHARDEV(chr);
QEMUSerialSetParams *ssp;
switch (cmd) {
case CHR_IOCTL_SERIAL_SET_PARAMS:
ssp = arg;
if (tablet->line_speed != ssp->speed) {
trace_wct_speed(ssp->speed);
wctablet_reset(tablet);
tablet->line_speed = ssp->speed;
}
break;
default:
return -ENOTSUP;
}
return 0;
}
static void wctablet_chr_finalize(Object *obj)
{
TabletChardev *tablet = WCTABLET_CHARDEV(obj);
qemu_input_handler_unregister(tablet->hs);
g_free(tablet);
}
static void wctablet_chr_open(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
TabletChardev *tablet = WCTABLET_CHARDEV(chr);
*be_opened = true;
/* init state machine */
memcpy(tablet->outbuf, WC_FULL_CONFIG_STRING, WC_FULL_CONFIG_STRING_LENGTH);
tablet->outlen = WC_FULL_CONFIG_STRING_LENGTH;
tablet->query_index = 0;
tablet->hs = qemu_input_handler_register((DeviceState *)tablet,
&wctablet_handler);
}
static void wctablet_chr_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->open = wctablet_chr_open;
cc->chr_write = wctablet_chr_write;
cc->chr_ioctl = wctablet_chr_ioctl;
cc->chr_accept_input = wctablet_chr_accept_input;
}
static const TypeInfo wctablet_type_info = {
.name = TYPE_CHARDEV_WCTABLET,
.parent = TYPE_CHARDEV,
.instance_size = sizeof(TabletChardev),
.instance_finalize = wctablet_chr_finalize,
.class_init = wctablet_chr_class_init,
};
static void register_types(void)
{
type_register_static(&wctablet_type_info);
}
type_init(register_types);

293
block.c
View File

@@ -588,21 +588,20 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
return drv;
}
static int find_image_format(BdrvChild *file, const char *filename,
static int find_image_format(BlockBackend *file, const char *filename,
BlockDriver **pdrv, Error **errp)
{
BlockDriverState *bs = file->bs;
BlockDriver *drv;
uint8_t buf[BLOCK_PROBE_BUF_SIZE];
int ret = 0;
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
*pdrv = &bdrv_raw;
return ret;
}
ret = bdrv_pread(file, 0, buf, sizeof(buf));
ret = blk_pread(file, 0, buf, sizeof(buf));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read image for determining its "
"format");
@@ -926,6 +925,95 @@ out:
g_free(gen_node_name);
}
static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
const char *node_name, QDict *options,
int open_flags, Error **errp)
{
Error *local_err = NULL;
int ret;
bdrv_assign_node_name(bs, node_name, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return -EINVAL;
}
bs->drv = drv;
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
bs->opaque = g_malloc0(drv->instance_size);
if (drv->bdrv_file_open) {
assert(!drv->bdrv_needs_filename || bs->filename[0]);
ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
} else if (drv->bdrv_open) {
ret = drv->bdrv_open(bs, options, open_flags, &local_err);
} else {
ret = 0;
}
if (ret < 0) {
if (local_err) {
error_propagate(errp, local_err);
} else if (bs->filename[0]) {
error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
} else {
error_setg_errno(errp, -ret, "Could not open image");
}
goto free_and_fail;
}
ret = refresh_total_sectors(bs, bs->total_sectors);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not refresh total sector count");
goto free_and_fail;
}
bdrv_refresh_limits(bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto free_and_fail;
}
assert(bdrv_opt_mem_align(bs) != 0);
assert(bdrv_min_mem_align(bs) != 0);
assert(is_power_of_2(bs->bl.request_alignment));
return 0;
free_and_fail:
/* FIXME Close bs first if already opened*/
g_free(bs->opaque);
bs->opaque = NULL;
bs->drv = NULL;
return ret;
}
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
int flags, Error **errp)
{
BlockDriverState *bs;
int ret;
bs = bdrv_new();
bs->open_flags = flags;
bs->explicit_options = qdict_new();
bs->options = qdict_new();
bs->opaque = NULL;
update_options_from_flags(bs->options, flags);
ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
if (ret < 0) {
QDECREF(bs->explicit_options);
QDECREF(bs->options);
bdrv_unref(bs);
return NULL;
}
return bs;
}
QemuOptsList bdrv_runtime_opts = {
.name = "bdrv_common",
.head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
@@ -974,7 +1062,7 @@ QemuOptsList bdrv_runtime_opts = {
*
* Removes all processed options from *options.
*/
static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
QDict *options, Error **errp)
{
int ret, open_flags;
@@ -1005,7 +1093,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
assert(drv != NULL);
if (file != NULL) {
filename = file->bs->filename;
filename = blk_bs(file)->filename;
} else {
filename = qdict_get_try_str(options, "filename");
}
@@ -1020,14 +1108,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
drv->format_name);
node_name = qemu_opt_get(opts, "node-name");
bdrv_assign_node_name(bs, node_name, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto fail_opts;
}
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
@@ -1093,62 +1173,19 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
}
pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
bs->drv = drv;
bs->opaque = g_malloc0(drv->instance_size);
/* Open the image, either directly or using a protocol */
open_flags = bdrv_open_flags(bs, bs->open_flags);
if (drv->bdrv_file_open) {
assert(file == NULL);
assert(!drv->bdrv_needs_filename || filename != NULL);
ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
} else {
if (file == NULL) {
error_setg(errp, "Can't use '%s' as a block driver for the "
"protocol level", drv->format_name);
ret = -EINVAL;
goto free_and_fail;
}
bs->file = file;
ret = drv->bdrv_open(bs, options, open_flags, &local_err);
}
node_name = qemu_opt_get(opts, "node-name");
assert(!drv->bdrv_file_open || file == NULL);
ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
if (ret < 0) {
if (local_err) {
error_propagate(errp, local_err);
} else if (bs->filename[0]) {
error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
} else {
error_setg_errno(errp, -ret, "Could not open image");
}
goto free_and_fail;
goto fail_opts;
}
ret = refresh_total_sectors(bs, bs->total_sectors);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not refresh total sector count");
goto free_and_fail;
}
bdrv_refresh_limits(bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto free_and_fail;
}
assert(bdrv_opt_mem_align(bs) != 0);
assert(bdrv_min_mem_align(bs) != 0);
assert(is_power_of_2(bs->bl.request_alignment));
qemu_opts_del(opts);
return 0;
free_and_fail:
bs->file = NULL;
g_free(bs->opaque);
bs->opaque = NULL;
bs->drv = NULL;
fail_opts:
qemu_opts_del(opts);
return ret;
@@ -1169,13 +1206,13 @@ static QDict *parse_json_filename(const char *filename, Error **errp)
return NULL;
}
if (qobject_type(options_obj) != QTYPE_QDICT) {
options = qobject_to_qdict(options_obj);
if (!options) {
qobject_decref(options_obj);
error_setg(errp, "Invalid JSON object given");
return NULL;
}
options = qobject_to_qdict(options_obj);
qdict_flatten(options);
return options;
@@ -1368,7 +1405,18 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
}
if (child->bs->inherits_from == parent) {
child->bs->inherits_from = NULL;
BdrvChild *c;
/* Remove inherits_from only when the last reference between parent and
* child->bs goes away. */
QLIST_FOREACH(c, &parent->children, next) {
if (c != child && c->bs == child->bs) {
break;
}
}
if (c == NULL) {
child->bs->inherits_from = NULL;
}
}
bdrv_root_unref_child(child);
@@ -1543,28 +1591,12 @@ free_exit:
return ret;
}
/*
* Opens a disk image whose options are given as BlockdevRef in another block
* device's options.
*
* If allow_none is true, no image will be opened if filename is false and no
* BlockdevRef is given. NULL will be returned, but errp remains unset.
*
* bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
* That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
* itself, all options starting with "${bdref_key}." are considered part of the
* BlockdevRef.
*
* The BlockdevRef will be removed from the options QDict.
*/
BdrvChild *bdrv_open_child(const char *filename,
QDict *options, const char *bdref_key,
BlockDriverState* parent,
const BdrvChildRole *child_role,
bool allow_none, Error **errp)
static BlockDriverState *
bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
BlockDriverState *parent, const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
BdrvChild *c = NULL;
BlockDriverState *bs;
BlockDriverState *bs = NULL;
QDict *image_options;
char *bdref_key_dot;
const char *reference;
@@ -1591,11 +1623,40 @@ BdrvChild *bdrv_open_child(const char *filename,
goto done;
}
c = bdrv_attach_child(parent, bs, bdref_key, child_role);
done:
qdict_del(options, bdref_key);
return c;
return bs;
}
/*
* Opens a disk image whose options are given as BlockdevRef in another block
* device's options.
*
* If allow_none is true, no image will be opened if filename is false and no
* BlockdevRef is given. NULL will be returned, but errp remains unset.
*
* bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
* That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
* itself, all options starting with "${bdref_key}." are considered part of the
* BlockdevRef.
*
* The BlockdevRef will be removed from the options QDict.
*/
BdrvChild *bdrv_open_child(const char *filename,
QDict *options, const char *bdref_key,
BlockDriverState *parent,
const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
BlockDriverState *bs;
bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
allow_none, errp);
if (bs == NULL) {
return NULL;
}
return bdrv_attach_child(parent, bs, bdref_key, child_role);
}
static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
@@ -1691,7 +1752,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
Error **errp)
{
int ret;
BdrvChild *file = NULL;
BlockBackend *file = NULL;
BlockDriverState *bs;
BlockDriver *drv = NULL;
const char *drvname;
@@ -1789,13 +1850,25 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
qdict_del(options, "backing");
}
/* Open image file without format layer */
/* Open image file without format layer. This BlockBackend is only used for
* probing, the block drivers will do their own bdrv_open_child() for the
* same BDS, which is why we put the node name back into options. */
if ((flags & BDRV_O_PROTOCOL) == 0) {
file = bdrv_open_child(filename, options, "file", bs,
&child_file, true, &local_err);
BlockDriverState *file_bs;
file_bs = bdrv_open_child_bs(filename, options, "file", bs,
&child_file, true, &local_err);
if (local_err) {
goto fail;
}
if (file_bs != NULL) {
file = blk_new();
blk_insert_bs(file, file_bs);
bdrv_unref(file_bs);
qdict_put(options, "file",
qstring_from_str(bdrv_get_node_name(file_bs)));
}
}
/* Image format probing */
@@ -1835,8 +1908,8 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
goto fail;
}
if (file && (bs->file != file)) {
bdrv_unref_child(bs, file);
if (file) {
blk_unref(file);
file = NULL;
}
@@ -1898,8 +1971,9 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
return bs;
fail:
if (file != NULL) {
bdrv_unref_child(bs, file);
blk_unref(file);
if (bs->file != NULL) {
bdrv_unref_child(bs, bs->file);
}
QDECREF(snapshot_options);
QDECREF(bs->explicit_options);
@@ -2626,8 +2700,9 @@ exit:
/**
* Truncate file to 'offset' bytes (needed only for file protocols)
*/
int bdrv_truncate(BlockDriverState *bs, int64_t offset)
int bdrv_truncate(BdrvChild *child, int64_t offset)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
int ret;
if (!drv)
@@ -3145,6 +3220,7 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
int is_protocol = 0;
BlockDriverState *curr_bs = NULL;
BlockDriverState *retval = NULL;
Error *local_error = NULL;
if (!bs || !bs->drv || !backing_file) {
return NULL;
@@ -3165,6 +3241,18 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
retval = curr_bs->backing->bs;
break;
}
/* Also check against the full backing filename for the image */
bdrv_get_full_backing_filename(curr_bs, backing_file_full, PATH_MAX,
&local_error);
if (local_error == NULL) {
if (strcmp(backing_file, backing_file_full) == 0) {
retval = curr_bs->backing->bs;
break;
}
} else {
error_free(local_error);
local_error = NULL;
}
} else {
/* If not an absolute filename path, make it relative to the current
* image's filename path */
@@ -3235,19 +3323,18 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
if (!(bs->open_flags & BDRV_O_INACTIVE)) {
return;
}
bs->open_flags &= ~BDRV_O_INACTIVE;
if (bs->drv->bdrv_invalidate_cache) {
bs->drv->bdrv_invalidate_cache(bs, &local_err);
QLIST_FOREACH(child, &bs->children, next) {
bdrv_invalidate_cache(child->bs, &local_err);
if (local_err) {
bs->open_flags |= BDRV_O_INACTIVE;
error_propagate(errp, local_err);
return;
}
}
QLIST_FOREACH(child, &bs->children, next) {
bdrv_invalidate_cache(child->bs, &local_err);
bs->open_flags &= ~BDRV_O_INACTIVE;
if (bs->drv->bdrv_invalidate_cache) {
bs->drv->bdrv_invalidate_cache(bs, &local_err);
if (local_err) {
bs->open_flags |= BDRV_O_INACTIVE;
error_propagate(errp, local_err);

View File

@@ -64,7 +64,7 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
retry = false;
QLIST_FOREACH(req, &job->inflight_reqs, list) {
if (end > req->start && start < req->end) {
qemu_co_queue_wait(&req->wait_queue);
qemu_co_queue_wait(&req->wait_queue, NULL);
retry = true;
break;
}

View File

@@ -405,12 +405,6 @@ out:
return ret;
}
static void error_callback_bh(void *opaque)
{
Coroutine *co = opaque;
qemu_coroutine_enter(co);
}
static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
{
BDRVBlkdebugState *s = bs->opaque;
@@ -423,8 +417,7 @@ static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
}
if (!immediately) {
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
qemu_coroutine_self());
aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
qemu_coroutine_yield();
}
@@ -670,7 +663,7 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
{
return bdrv_truncate(bs->file->bs, offset);
return bdrv_truncate(bs->file, offset);
}
static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)

View File

@@ -60,7 +60,7 @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
static void blkreplay_bh_cb(void *opaque)
{
Request *req = opaque;
qemu_coroutine_enter(req->co);
aio_co_wake(req->co);
qemu_bh_delete(req->bh);
g_free(req);
}

View File

@@ -880,7 +880,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
{
QEMUIOVector qiov;
struct iovec iov;
Coroutine *co;
BlkRwCo rwco;
iov = (struct iovec) {
@@ -897,9 +896,14 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
.ret = NOT_DONE,
};
co = qemu_coroutine_create(co_entry, &rwco);
qemu_coroutine_enter(co);
BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
co_entry(&rwco);
} else {
Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
qemu_coroutine_enter(co);
BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
}
return rwco.ret;
}
@@ -979,7 +983,6 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
static void blk_aio_complete_bh(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
assert(acb->has_returned);
blk_aio_complete(acb);
}
@@ -1602,7 +1605,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
return -ENOMEDIUM;
}
return bdrv_truncate(blk_bs(blk), offset);
return bdrv_truncate(blk->root, offset);
}
static void blk_pdiscard_entry(void *opaque)

View File

@@ -104,6 +104,12 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
struct bochs_header bochs;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
bs->read_only = true; /* no write support yet */
ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));

View File

@@ -66,6 +66,12 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
uint32_t offsets_size, max_compressed_block_size = 1, i;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
bs->read_only = true;
/* read header */

View File

@@ -300,6 +300,12 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
QCryptoBlockOpenOptions *open_opts = NULL;
unsigned int cflags = 0;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
@@ -383,7 +389,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
offset += payload_offset;
return bdrv_truncate(bs->file->bs, offset);
return bdrv_truncate(bs->file, offset);
}
static void block_crypto_close(BlockDriverState *bs)

View File

@@ -386,9 +386,8 @@ static void curl_multi_check_completion(BDRVCURLState *s)
}
}
static void curl_multi_do(void *arg)
static void curl_multi_do_locked(CURLState *s)
{
CURLState *s = (CURLState *)arg;
CURLSocket *socket, *next_socket;
int running;
int r;
@@ -406,12 +405,23 @@ static void curl_multi_do(void *arg)
}
}
static void curl_multi_do(void *arg)
{
CURLState *s = (CURLState *)arg;
aio_context_acquire(s->s->aio_context);
curl_multi_do_locked(s);
aio_context_release(s->s->aio_context);
}
static void curl_multi_read(void *arg)
{
CURLState *s = (CURLState *)arg;
curl_multi_do(arg);
aio_context_acquire(s->s->aio_context);
curl_multi_do_locked(s);
curl_multi_check_completion(s->s);
aio_context_release(s->s->aio_context);
}
static void curl_multi_timeout_do(void *arg)
@@ -424,9 +434,11 @@ static void curl_multi_timeout_do(void *arg)
return;
}
aio_context_acquire(s->aio_context);
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
curl_multi_check_completion(s);
aio_context_release(s->aio_context);
#else
abort();
#endif
@@ -784,13 +796,18 @@ static void curl_readv_bh_cb(void *p)
{
CURLState *state;
int running;
int ret = -EINPROGRESS;
CURLAIOCB *acb = p;
BDRVCURLState *s = acb->common.bs->opaque;
BlockDriverState *bs = acb->common.bs;
BDRVCURLState *s = bs->opaque;
AioContext *ctx = bdrv_get_aio_context(bs);
size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
size_t end;
aio_context_acquire(ctx);
// In case we have the requested data already (e.g. read-ahead),
// we can just call the callback and be done.
switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
@@ -798,7 +815,7 @@ static void curl_readv_bh_cb(void *p)
qemu_aio_unref(acb);
// fall through
case FIND_RET_WAIT:
return;
goto out;
default:
break;
}
@@ -806,9 +823,8 @@ static void curl_readv_bh_cb(void *p)
// No cache found, so let's start a new request
state = curl_init_state(acb->common.bs, s);
if (!state) {
acb->common.cb(acb->common.opaque, -EIO);
qemu_aio_unref(acb);
return;
ret = -EIO;
goto out;
}
acb->start = 0;
@@ -822,9 +838,8 @@ static void curl_readv_bh_cb(void *p)
state->orig_buf = g_try_malloc(state->buf_len);
if (state->buf_len && state->orig_buf == NULL) {
curl_clean_state(state);
acb->common.cb(acb->common.opaque, -ENOMEM);
qemu_aio_unref(acb);
return;
ret = -ENOMEM;
goto out;
}
state->acb[0] = acb;
@@ -837,6 +852,13 @@ static void curl_readv_bh_cb(void *p)
/* Tell curl it needs to kick things off */
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
out:
aio_context_release(ctx);
if (ret != -EINPROGRESS) {
acb->common.cb(acb->common.opaque, ret);
qemu_aio_unref(acb);
}
}
static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,

View File

@@ -413,6 +413,12 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
int64_t offset;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
block_module_load_one("dmg-bz2");
bs->read_only = true;

View File

@@ -1591,18 +1591,17 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
#endif
}
if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
goto out_close;
}
switch (prealloc) {
#ifdef CONFIG_POSIX_FALLOCATE
case PREALLOC_MODE_FALLOC:
/* posix_fallocate() doesn't set errno. */
/*
* Truncating before posix_fallocate() makes it about twice slower on
* file systems that do not support fallocate(), trying to check if a
* block is allocated before allocating it, so don't do that here.
*/
result = -posix_fallocate(fd, 0, total_size);
if (result != 0) {
/* posix_fallocate() doesn't set errno. */
error_setg_errno(errp, -result,
"Could not preallocate data for the new file");
}
@@ -1610,6 +1609,17 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
#endif
case PREALLOC_MODE_FULL:
{
/*
* Knowing the final size from the beginning could allow the file
* system driver to do less allocations and possibly avoid
* fragmentation of the file.
*/
if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
goto out_close;
}
int64_t num = 0, left = total_size;
buf = g_malloc0(65536);
@@ -1636,6 +1646,10 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
break;
}
case PREALLOC_MODE_OFF:
if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
}
break;
default:
result = -EINVAL;

View File

@@ -698,13 +698,6 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
return qemu_gluster_glfs_init(gconf, errp);
}
static void qemu_gluster_complete_aio(void *opaque)
{
GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
qemu_coroutine_enter(acb->coroutine);
}
/*
* AIO callback routine called from GlusterFS thread.
*/
@@ -720,7 +713,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
acb->ret = -EIO; /* Partial read/write - fail it */
}
aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
aio_co_schedule(acb->aio_context, acb->coroutine);
}
static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)

View File

@@ -189,7 +189,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
bdrv_dec_in_flight(bs);
bdrv_drained_begin(bs);
data->done = true;
qemu_coroutine_enter(co);
aio_co_wake(co);
}
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
@@ -539,7 +539,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
* (instead of producing a deadlock in the former case). */
if (!req->waiting_for) {
self->waiting_for = req;
qemu_co_queue_wait(&req->wait_queue);
qemu_co_queue_wait(&req->wait_queue, NULL);
self->waiting_for = NULL;
retry = true;
waited = true;
@@ -813,7 +813,7 @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
CoroutineIOCompletion *co = opaque;
co->ret = ret;
qemu_coroutine_enter(co->coroutine);
aio_co_wake(co->coroutine);
}
static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
@@ -2080,6 +2080,11 @@ void bdrv_aio_cancel(BlockAIOCB *acb)
if (acb->aiocb_info->get_aio_context) {
aio_poll(acb->aiocb_info->get_aio_context(acb), true);
} else if (acb->bs) {
/* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
* assert that we're not using an I/O thread. Thread-safe
* code should use bdrv_aio_cancel_async exclusively.
*/
assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
aio_poll(bdrv_get_aio_context(acb->bs), true);
} else {
abort();
@@ -2239,35 +2244,6 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
return &acb->common;
}
void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque)
{
BlockAIOCB *acb;
acb = g_malloc(aiocb_info->aiocb_size);
acb->aiocb_info = aiocb_info;
acb->bs = bs;
acb->cb = cb;
acb->opaque = opaque;
acb->refcnt = 1;
return acb;
}
void qemu_aio_ref(void *p)
{
BlockAIOCB *acb = p;
acb->refcnt++;
}
void qemu_aio_unref(void *p)
{
BlockAIOCB *acb = p;
assert(acb->refcnt > 0);
if (--acb->refcnt == 0) {
g_free(acb);
}
}
/**************************************************************/
/* Coroutine block device emulation */
@@ -2299,7 +2275,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
/* Wait until any previous flushes are completed */
while (bs->active_flush_req) {
qemu_co_queue_wait(&bs->flush_queue);
qemu_co_queue_wait(&bs->flush_queue, NULL);
}
bs->active_flush_req = true;

View File

@@ -165,8 +165,9 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
static void iscsi_co_generic_bh_cb(void *opaque)
{
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
qemu_coroutine_enter(iTask->co);
aio_co_wake(iTask->co);
}
static void iscsi_retry_timer_expired(void *opaque)
@@ -174,7 +175,7 @@ static void iscsi_retry_timer_expired(void *opaque)
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
if (iTask->co) {
qemu_coroutine_enter(iTask->co);
aio_co_wake(iTask->co);
}
}
@@ -394,8 +395,10 @@ iscsi_process_read(void *arg)
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
aio_context_acquire(iscsilun->aio_context);
iscsi_service(iscsi, POLLIN);
iscsi_set_events(iscsilun);
aio_context_release(iscsilun->aio_context);
}
static void
@@ -404,8 +407,10 @@ iscsi_process_write(void *arg)
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
aio_context_acquire(iscsilun->aio_context);
iscsi_service(iscsi, POLLOUT);
iscsi_set_events(iscsilun);
aio_context_release(iscsilun->aio_context);
}
static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
@@ -1240,29 +1245,14 @@ retry:
return 0;
}
static void parse_chap(struct iscsi_context *iscsi, const char *target,
static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
Error **errp)
{
QemuOptsList *list;
QemuOpts *opts;
const char *user = NULL;
const char *password = NULL;
const char *secretid;
char *secret = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
return;
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return;
}
}
user = qemu_opt_get(opts, "user");
if (!user) {
return;
@@ -1293,64 +1283,36 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
g_free(secret);
}
static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
Error **errp)
{
QemuOptsList *list;
QemuOpts *opts;
const char *digest = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
return;
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return;
}
}
digest = qemu_opt_get(opts, "header-digest");
if (!digest) {
return;
}
if (!strcmp(digest, "CRC32C")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
} else if (!strcmp(digest, "crc32c")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
} else if (!strcmp(digest, "NONE")) {
} else if (!strcmp(digest, "none")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
} else if (!strcmp(digest, "CRC32C-NONE")) {
} else if (!strcmp(digest, "crc32c-none")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
} else if (!strcmp(digest, "NONE-CRC32C")) {
} else if (!strcmp(digest, "none-crc32c")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
} else {
error_setg(errp, "Invalid header-digest setting : %s", digest);
}
}
static char *parse_initiator_name(const char *target)
static char *get_initiator_name(QemuOpts *opts)
{
QemuOptsList *list;
QemuOpts *opts;
const char *name;
char *iscsi_name;
UuidInfo *uuid_info;
list = qemu_find_opts("iscsi");
if (list) {
opts = qemu_opts_find(list, target);
if (!opts) {
opts = QTAILQ_FIRST(&list->head);
}
if (opts) {
name = qemu_opt_get(opts, "initiator-name");
if (name) {
return g_strdup(name);
}
}
name = qemu_opt_get(opts, "initiator-name");
if (name) {
return g_strdup(name);
}
uuid_info = qmp_query_uuid(NULL);
@@ -1365,43 +1327,24 @@ static char *parse_initiator_name(const char *target)
return iscsi_name;
}
static int parse_timeout(const char *target)
{
QemuOptsList *list;
QemuOpts *opts;
const char *timeout;
list = qemu_find_opts("iscsi");
if (list) {
opts = qemu_opts_find(list, target);
if (!opts) {
opts = QTAILQ_FIRST(&list->head);
}
if (opts) {
timeout = qemu_opt_get(opts, "timeout");
if (timeout) {
return atoi(timeout);
}
}
}
return 0;
}
static void iscsi_nop_timed_event(void *opaque)
{
IscsiLun *iscsilun = opaque;
aio_context_acquire(iscsilun->aio_context);
if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
error_report("iSCSI: NOP timeout. Reconnecting...");
iscsilun->request_timed_out = true;
} else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
return;
goto out;
}
timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
iscsi_set_events(iscsilun);
out:
aio_context_release(iscsilun->aio_context);
}
static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
@@ -1474,20 +1417,6 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
}
}
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "iscsi",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "filename",
.type = QEMU_OPT_STRING,
.help = "URL to the iscsi image",
},
{ /* end of list */ }
},
};
static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
int evpd, int pc, void **inq, Error **errp)
{
@@ -1605,24 +1534,178 @@ out:
}
}
static void iscsi_parse_iscsi_option(const char *target, QDict *options)
{
QemuOptsList *list;
QemuOpts *opts;
const char *user, *password, *password_secret, *initiator_name,
*header_digest, *timeout;
list = qemu_find_opts("iscsi");
if (!list) {
return;
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return;
}
}
user = qemu_opt_get(opts, "user");
if (user) {
qdict_set_default_str(options, "user", user);
}
password = qemu_opt_get(opts, "password");
if (password) {
qdict_set_default_str(options, "password", password);
}
password_secret = qemu_opt_get(opts, "password-secret");
if (password_secret) {
qdict_set_default_str(options, "password-secret", password_secret);
}
initiator_name = qemu_opt_get(opts, "initiator-name");
if (initiator_name) {
qdict_set_default_str(options, "initiator-name", initiator_name);
}
header_digest = qemu_opt_get(opts, "header-digest");
if (header_digest) {
/* -iscsi takes upper case values, but QAPI only supports lower case
* enum constant names, so we have to convert here. */
char *qapi_value = g_ascii_strdown(header_digest, -1);
qdict_set_default_str(options, "header-digest", qapi_value);
g_free(qapi_value);
}
timeout = qemu_opt_get(opts, "timeout");
if (timeout) {
qdict_set_default_str(options, "timeout", timeout);
}
}
/*
* We support iscsi url's on the form
* iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
*/
static void iscsi_parse_filename(const char *filename, QDict *options,
Error **errp)
{
struct iscsi_url *iscsi_url;
const char *transport_name;
char *lun_str;
iscsi_url = iscsi_parse_full_url(NULL, filename);
if (iscsi_url == NULL) {
error_setg(errp, "Failed to parse URL : %s", filename);
return;
}
#if LIBISCSI_API_VERSION >= (20160603)
switch (iscsi_url->transport) {
case TCP_TRANSPORT:
transport_name = "tcp";
break;
case ISER_TRANSPORT:
transport_name = "iser";
break;
default:
error_setg(errp, "Unknown transport type (%d)",
iscsi_url->transport);
return;
}
#else
transport_name = "tcp";
#endif
qdict_set_default_str(options, "transport", transport_name);
qdict_set_default_str(options, "portal", iscsi_url->portal);
qdict_set_default_str(options, "target", iscsi_url->target);
lun_str = g_strdup_printf("%d", iscsi_url->lun);
qdict_set_default_str(options, "lun", lun_str);
g_free(lun_str);
/* User/password from -iscsi take precedence over those from the URL */
iscsi_parse_iscsi_option(iscsi_url->target, options);
if (iscsi_url->user[0] != '\0') {
qdict_set_default_str(options, "user", iscsi_url->user);
qdict_set_default_str(options, "password", iscsi_url->passwd);
}
iscsi_destroy_url(iscsi_url);
}
static QemuOptsList runtime_opts = {
.name = "iscsi",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "transport",
.type = QEMU_OPT_STRING,
},
{
.name = "portal",
.type = QEMU_OPT_STRING,
},
{
.name = "target",
.type = QEMU_OPT_STRING,
},
{
.name = "user",
.type = QEMU_OPT_STRING,
},
{
.name = "password",
.type = QEMU_OPT_STRING,
},
{
.name = "password-secret",
.type = QEMU_OPT_STRING,
},
{
.name = "lun",
.type = QEMU_OPT_NUMBER,
},
{
.name = "initiator-name",
.type = QEMU_OPT_STRING,
},
{
.name = "header-digest",
.type = QEMU_OPT_STRING,
},
{
.name = "timeout",
.type = QEMU_OPT_NUMBER,
},
{ /* end of list */ }
},
};
static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = NULL;
struct iscsi_url *iscsi_url = NULL;
struct scsi_task *task = NULL;
struct scsi_inquiry_standard *inq = NULL;
struct scsi_inquiry_supported_pages *inq_vpd;
char *initiator_name = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
int i, ret = 0, timeout = 0;
const char *transport_name, *portal, *target;
#if LIBISCSI_API_VERSION >= (20160603)
enum iscsi_transport_type transport;
#endif
int i, ret = 0, timeout = 0, lun;
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1632,18 +1715,34 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
filename = qemu_opt_get(opts, "filename");
transport_name = qemu_opt_get(opts, "transport");
portal = qemu_opt_get(opts, "portal");
target = qemu_opt_get(opts, "target");
lun = qemu_opt_get_number(opts, "lun", 0);
iscsi_url = iscsi_parse_full_url(iscsi, filename);
if (iscsi_url == NULL) {
error_setg(errp, "Failed to parse URL : %s", filename);
if (!transport_name || !portal || !target) {
error_setg(errp, "Need all of transport, portal and target options");
ret = -EINVAL;
goto out;
}
if (!strcmp(transport_name, "tcp")) {
#if LIBISCSI_API_VERSION >= (20160603)
transport = TCP_TRANSPORT;
} else if (!strcmp(transport_name, "iser")) {
transport = ISER_TRANSPORT;
#else
/* TCP is what older libiscsi versions always use */
#endif
} else {
error_setg(errp, "Unknown transport: %s", transport_name);
ret = -EINVAL;
goto out;
}
memset(iscsilun, 0, sizeof(IscsiLun));
initiator_name = parse_initiator_name(iscsi_url->target);
initiator_name = get_initiator_name(opts);
iscsi = iscsi_create_context(initiator_name);
if (iscsi == NULL) {
@@ -1652,30 +1751,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
#if LIBISCSI_API_VERSION >= (20160603)
if (iscsi_init_transport(iscsi, iscsi_url->transport)) {
if (iscsi_init_transport(iscsi, transport)) {
error_setg(errp, ("Error initializing transport."));
ret = -EINVAL;
goto out;
}
#endif
if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
if (iscsi_set_targetname(iscsi, target)) {
error_setg(errp, "iSCSI: Failed to set target name.");
ret = -EINVAL;
goto out;
}
if (iscsi_url->user[0] != '\0') {
ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
iscsi_url->passwd);
if (ret != 0) {
error_setg(errp, "Failed to set initiator username and password");
ret = -EINVAL;
goto out;
}
}
/* check if we got CHAP username/password via the options */
parse_chap(iscsi, iscsi_url->target, &local_err);
apply_chap(iscsi, opts, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
ret = -EINVAL;
@@ -1688,10 +1777,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
/* check if we got HEADER_DIGEST via the options */
parse_header_digest(iscsi, iscsi_url->target, &local_err);
apply_header_digest(iscsi, opts, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
ret = -EINVAL;
@@ -1699,7 +1786,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
}
/* timeout handling is broken in libiscsi before 1.15.0 */
timeout = parse_timeout(iscsi_url->target);
timeout = qemu_opt_get_number(opts, "timeout", 0);
#if LIBISCSI_API_VERSION >= 20150621
iscsi_set_timeout(iscsi, timeout);
#else
@@ -1708,7 +1795,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
}
#endif
if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
iscsi_get_error(iscsi));
ret = -EINVAL;
@@ -1717,7 +1804,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
iscsilun->iscsi = iscsi;
iscsilun->aio_context = bdrv_get_aio_context(bs);
iscsilun->lun = iscsi_url->lun;
iscsilun->lun = lun;
iscsilun->has_write_same = true;
task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
@@ -1820,9 +1907,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
out:
qemu_opts_del(opts);
g_free(initiator_name);
if (iscsi_url != NULL) {
iscsi_destroy_url(iscsi_url);
}
if (task != NULL) {
scsi_free_scsi_task(task);
}
@@ -2031,15 +2115,15 @@ static BlockDriver bdrv_iscsi = {
.format_name = "iscsi",
.protocol_name = "iscsi",
.instance_size = sizeof(IscsiLun),
.bdrv_needs_filename = true,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.instance_size = sizeof(IscsiLun),
.bdrv_parse_filename = iscsi_parse_filename,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.bdrv_getlength = iscsi_getlength,
.bdrv_get_info = iscsi_get_info,
@@ -2066,15 +2150,15 @@ static BlockDriver bdrv_iser = {
.format_name = "iser",
.protocol_name = "iser",
.instance_size = sizeof(IscsiLun),
.bdrv_needs_filename = true,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.instance_size = sizeof(IscsiLun),
.bdrv_parse_filename = iscsi_parse_filename,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.bdrv_getlength = iscsi_getlength,
.bdrv_get_info = iscsi_get_info,

View File

@@ -54,10 +54,10 @@ struct LinuxAioState {
io_context_t ctx;
EventNotifier e;
/* io queue for submit at batch */
/* io queue for submit at batch. Protected by AioContext lock. */
LaioQueue io_q;
/* I/O completion processing */
/* I/O completion processing. Only runs in I/O thread. */
QEMUBH *completion_bh;
int event_idx;
int event_max;
@@ -100,7 +100,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
* that!
*/
if (!qemu_coroutine_entered(laiocb->co)) {
qemu_coroutine_enter(laiocb->co);
aio_co_wake(laiocb->co);
}
} else {
laiocb->common.cb(laiocb->common.opaque, ret);
@@ -234,9 +234,12 @@ static void qemu_laio_process_completions(LinuxAioState *s)
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
{
qemu_laio_process_completions(s);
aio_context_acquire(s->aio_context);
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
aio_context_release(s->aio_context);
}
static void qemu_laio_completion_bh(void *opaque)
@@ -455,6 +458,7 @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
{
aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
qemu_bh_delete(s->completion_bh);
s->aio_context = NULL;
}
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)

View File

@@ -69,6 +69,7 @@ typedef struct MirrorBlockJob {
bool waiting_for_io;
int target_cluster_sectors;
int max_iov;
bool initial_zeroing_ongoing;
} MirrorBlockJob;
typedef struct MirrorOp {
@@ -117,9 +118,10 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
if (s->cow_bitmap) {
bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
}
s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
if (!s->initial_zeroing_ongoing) {
s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
}
}
qemu_iovec_destroy(&op->qiov);
g_free(op);
@@ -132,6 +134,8 @@ static void mirror_write_complete(void *opaque, int ret)
{
MirrorOp *op = opaque;
MirrorBlockJob *s = op->s;
aio_context_acquire(blk_get_aio_context(s->common.blk));
if (ret < 0) {
BlockErrorAction action;
@@ -142,12 +146,15 @@ static void mirror_write_complete(void *opaque, int ret)
}
}
mirror_iteration_done(op, ret);
aio_context_release(blk_get_aio_context(s->common.blk));
}
static void mirror_read_complete(void *opaque, int ret)
{
MirrorOp *op = opaque;
MirrorBlockJob *s = op->s;
aio_context_acquire(blk_get_aio_context(s->common.blk));
if (ret < 0) {
BlockErrorAction action;
@@ -158,10 +165,11 @@ static void mirror_read_complete(void *opaque, int ret)
}
mirror_iteration_done(op, ret);
return;
} else {
blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
0, mirror_write_complete, op);
}
blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
0, mirror_write_complete, op);
aio_context_release(blk_get_aio_context(s->common.blk));
}
static inline void mirror_clip_sectors(MirrorBlockJob *s,
@@ -566,6 +574,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
return 0;
}
s->initial_zeroing_ongoing = true;
for (sector_num = 0; sector_num < end; ) {
int nb_sectors = MIN(end - sector_num,
QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
@@ -573,6 +582,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
mirror_throttle(s);
if (block_job_is_cancelled(&s->common)) {
s->initial_zeroing_ongoing = false;
return 0;
}
@@ -587,6 +597,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
}
mirror_wait_for_all_io(s);
s->initial_zeroing_ongoing = false;
}
/* First part, loop on the sectors and initialize the dirty bitmap. */
@@ -651,7 +662,28 @@ static void coroutine_fn mirror_run(void *opaque)
if (s->bdev_length < 0) {
ret = s->bdev_length;
goto immediate_exit;
} else if (s->bdev_length == 0) {
}
/* Active commit must resize the base image if its size differs from the
* active layer. */
if (s->base == blk_bs(s->target)) {
int64_t base_length;
base_length = blk_getlength(s->target);
if (base_length < 0) {
ret = base_length;
goto immediate_exit;
}
if (s->bdev_length > base_length) {
ret = blk_truncate(s->target, s->bdev_length);
if (ret < 0) {
goto immediate_exit;
}
}
}
if (s->bdev_length == 0) {
/* Report BLOCK_JOB_READY and wait for complete. */
block_job_event_ready(&s->common);
s->synced = true;
@@ -1052,9 +1084,7 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque, Error **errp,
bool auto_complete)
{
int64_t length, base_length;
int orig_base_flags;
int ret;
Error *local_err = NULL;
orig_base_flags = bdrv_get_flags(base);
@@ -1063,31 +1093,6 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
return;
}
length = bdrv_getlength(bs);
if (length < 0) {
error_setg_errno(errp, -length,
"Unable to determine length of %s", bs->filename);
goto error_restore_flags;
}
base_length = bdrv_getlength(base);
if (base_length < 0) {
error_setg_errno(errp, -base_length,
"Unable to determine length of %s", base->filename);
goto error_restore_flags;
}
if (length > base_length) {
ret = bdrv_truncate(base, length);
if (ret < 0) {
error_setg_errno(errp, -ret,
"Top image %s is larger than base image %s, and "
"resize of base image failed",
bs->filename, base->filename);
goto error_restore_flags;
}
}
mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN,
on_error, on_error, true, cb, opaque, &local_err,

View File

@@ -33,8 +33,9 @@
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
static void nbd_recv_coroutines_enter_all(BlockDriverState *bs)
{
NBDClientSession *s = nbd_get_client_session(bs);
int i;
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
@@ -42,6 +43,7 @@ static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
qemu_coroutine_enter(s->recv_coroutine[i]);
}
}
BDRV_POLL_WHILE(bs, s->read_reply_co);
}
static void nbd_teardown_connection(BlockDriverState *bs)
@@ -56,7 +58,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
qio_channel_shutdown(client->ioc,
QIO_CHANNEL_SHUTDOWN_BOTH,
NULL);
nbd_recv_coroutines_enter_all(client);
nbd_recv_coroutines_enter_all(bs);
nbd_client_detach_aio_context(bs);
object_unref(OBJECT(client->sioc));
@@ -65,54 +67,43 @@ static void nbd_teardown_connection(BlockDriverState *bs)
client->ioc = NULL;
}
static void nbd_reply_ready(void *opaque)
static coroutine_fn void nbd_read_reply_entry(void *opaque)
{
BlockDriverState *bs = opaque;
NBDClientSession *s = nbd_get_client_session(bs);
NBDClientSession *s = opaque;
uint64_t i;
int ret;
if (!s->ioc) { /* Already closed */
return;
}
if (s->reply.handle == 0) {
/* No reply already in flight. Fetch a header. It is possible
* that another thread has done the same thing in parallel, so
* the socket is not readable anymore.
*/
for (;;) {
assert(s->reply.handle == 0);
ret = nbd_receive_reply(s->ioc, &s->reply);
if (ret == -EAGAIN) {
return;
}
if (ret < 0) {
s->reply.handle = 0;
goto fail;
break;
}
/* There's no need for a mutex on the receive side, because the
* handler acts as a synchronization point and ensures that only
* one coroutine is called until the reply finishes.
*/
i = HANDLE_TO_INDEX(s, s->reply.handle);
if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
break;
}
/* We're woken up by the recv_coroutine itself. Note that there
* is no race between yielding and reentering read_reply_co. This
* is because:
*
* - if recv_coroutine[i] runs on the same AioContext, it is only
* entered after we yield
*
* - if recv_coroutine[i] runs on a different AioContext, reentering
* read_reply_co happens through a bottom half, which can only
* run after we yield.
*/
aio_co_wake(s->recv_coroutine[i]);
qemu_coroutine_yield();
}
/* There's no need for a mutex on the receive side, because the
* handler acts as a synchronization point and ensures that only
* one coroutine is called until the reply finishes. */
i = HANDLE_TO_INDEX(s, s->reply.handle);
if (i >= MAX_NBD_REQUESTS) {
goto fail;
}
if (s->recv_coroutine[i]) {
qemu_coroutine_enter(s->recv_coroutine[i]);
return;
}
fail:
nbd_teardown_connection(bs);
}
static void nbd_restart_write(void *opaque)
{
BlockDriverState *bs = opaque;
qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
s->read_reply_co = NULL;
}
static int nbd_co_send_request(BlockDriverState *bs,
@@ -120,7 +111,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
QEMUIOVector *qiov)
{
NBDClientSession *s = nbd_get_client_session(bs);
AioContext *aio_context;
int rc, ret, i;
qemu_co_mutex_lock(&s->send_mutex);
@@ -141,11 +131,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
return -EPIPE;
}
s->send_coroutine = qemu_coroutine_self();
aio_context = bdrv_get_aio_context(bs);
aio_set_fd_handler(aio_context, s->sioc->fd, false,
nbd_reply_ready, nbd_restart_write, NULL, bs);
if (qiov) {
qio_channel_set_cork(s->ioc, true);
rc = nbd_send_request(s->ioc, request);
@@ -160,9 +145,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
} else {
rc = nbd_send_request(s->ioc, request);
}
aio_set_fd_handler(aio_context, s->sioc->fd, false,
nbd_reply_ready, NULL, NULL, bs);
s->send_coroutine = NULL;
qemu_co_mutex_unlock(&s->send_mutex);
return rc;
}
@@ -174,8 +156,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
{
int ret;
/* Wait until we're woken up by the read handler. TODO: perhaps
* peek at the next reply and avoid yielding if it's ours? */
/* Wait until we're woken up by nbd_read_reply_entry. */
qemu_coroutine_yield();
*reply = s->reply;
if (reply->handle != request->handle ||
@@ -201,7 +182,7 @@ static void nbd_coroutine_start(NBDClientSession *s,
/* Poor man semaphore. The free_sema is locked when no other request
* can be accepted, and unlocked after receiving one reply. */
if (s->in_flight == MAX_NBD_REQUESTS) {
qemu_co_queue_wait(&s->free_sema);
qemu_co_queue_wait(&s->free_sema, NULL);
assert(s->in_flight < MAX_NBD_REQUESTS);
}
s->in_flight++;
@@ -209,13 +190,19 @@ static void nbd_coroutine_start(NBDClientSession *s,
/* s->recv_coroutine[i] is set as soon as we get the send_lock. */
}
static void nbd_coroutine_end(NBDClientSession *s,
static void nbd_coroutine_end(BlockDriverState *bs,
NBDRequest *request)
{
NBDClientSession *s = nbd_get_client_session(bs);
int i = HANDLE_TO_INDEX(s, request->handle);
s->recv_coroutine[i] = NULL;
if (s->in_flight-- == MAX_NBD_REQUESTS) {
qemu_co_queue_next(&s->free_sema);
s->in_flight--;
qemu_co_queue_next(&s->free_sema);
/* Kick the read_reply_co to get the next reply. */
if (s->read_reply_co) {
aio_co_wake(s->read_reply_co);
}
}
@@ -241,7 +228,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
} else {
nbd_co_receive_reply(client, &request, &reply, qiov);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -271,7 +258,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -306,7 +293,7 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -331,7 +318,7 @@ int nbd_client_co_flush(BlockDriverState *bs)
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -357,23 +344,23 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
void nbd_client_detach_aio_context(BlockDriverState *bs)
{
aio_set_fd_handler(bdrv_get_aio_context(bs),
nbd_get_client_session(bs)->sioc->fd,
false, NULL, NULL, NULL, NULL);
NBDClientSession *client = nbd_get_client_session(bs);
qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
}
void nbd_client_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
false, nbd_reply_ready, NULL, NULL, bs);
NBDClientSession *client = nbd_get_client_session(bs);
qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
aio_co_schedule(new_context, client->read_reply_co);
}
void nbd_client_close(BlockDriverState *bs)
@@ -434,7 +421,7 @@ int nbd_client_init(BlockDriverState *bs,
/* Now that we're connected, set the socket to be non-blocking and
* kick the reply mechanism. */
qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
logout("Established connection with NBD server\n");

View File

@@ -25,7 +25,7 @@ typedef struct NBDClientSession {
CoMutex send_mutex;
CoQueue free_sema;
Coroutine *send_coroutine;
Coroutine *read_reply_co;
int in_flight;
Coroutine *recv_coroutine[MAX_NBD_REQUESTS];

View File

@@ -537,8 +537,6 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
visit_complete(ov, &saddr_qdict);
visit_free(ov);
assert(qobject_type(saddr_qdict) == QTYPE_QDICT);
qdict_put_obj(opts, "server", saddr_qdict);
if (s->export) {

View File

@@ -108,12 +108,13 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
qdict_put(options, "path", qstring_from_str(uri->path));
for (i = 0; i < qp->n; i++) {
unsigned long long val;
if (!qp->p[i].value) {
error_setg(errp, "Value for NFS parameter expected: %s",
qp->p[i].name);
goto out;
}
if (parse_uint_full(qp->p[i].value, NULL, 0)) {
if (parse_uint_full(qp->p[i].value, &val, 0)) {
error_setg(errp, "Illegal value for NFS parameter: %s",
qp->p[i].name);
goto out;
@@ -207,15 +208,21 @@ static void nfs_set_events(NFSClient *client)
static void nfs_process_read(void *arg)
{
NFSClient *client = arg;
aio_context_acquire(client->aio_context);
nfs_service(client->context, POLLIN);
nfs_set_events(client);
aio_context_release(client->aio_context);
}
static void nfs_process_write(void *arg)
{
NFSClient *client = arg;
aio_context_acquire(client->aio_context);
nfs_service(client->context, POLLOUT);
nfs_set_events(client);
aio_context_release(client->aio_context);
}
static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
@@ -230,8 +237,9 @@ static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
static void nfs_co_generic_bh_cb(void *opaque)
{
NFSRPC *task = opaque;
task->complete = 1;
qemu_coroutine_enter(task->co);
aio_co_wake(task->co);
}
static void
@@ -255,9 +263,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
nfs_co_generic_bh_cb, task);
}
static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov)
static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *iov,
int flags)
{
NFSClient *client = bs->opaque;
NFSRPC task;
@@ -266,9 +274,7 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
task.iov = iov;
if (nfs_pread_async(client->context, client->fh,
sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE,
nfs_co_generic_cb, &task) != 0) {
offset, bytes, nfs_co_generic_cb, &task) != 0) {
return -ENOMEM;
}
@@ -289,28 +295,34 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
return 0;
}
static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov)
static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *iov,
int flags)
{
NFSClient *client = bs->opaque;
NFSRPC task;
char *buf = NULL;
bool my_buffer = false;
nfs_co_init_task(bs, &task);
buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
if (nb_sectors && buf == NULL) {
return -ENOMEM;
if (iov->niov != 1) {
buf = g_try_malloc(bytes);
if (bytes && buf == NULL) {
return -ENOMEM;
}
qemu_iovec_to_buf(iov, 0, buf, bytes);
my_buffer = true;
} else {
buf = iov->iov[0].iov_base;
}
qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
if (nfs_pwrite_async(client->context, client->fh,
sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE,
buf, nfs_co_generic_cb, &task) != 0) {
g_free(buf);
offset, bytes, buf,
nfs_co_generic_cb, &task) != 0) {
if (my_buffer) {
g_free(buf);
}
return -ENOMEM;
}
@@ -319,9 +331,11 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
qemu_coroutine_yield();
}
g_free(buf);
if (my_buffer) {
g_free(buf);
}
if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
if (task.ret != bytes) {
return task.ret < 0 ? task.ret : -EIO;
}
@@ -358,27 +372,27 @@ static QemuOptsList runtime_opts = {
.help = "Path of the image on the host",
},
{
.name = "uid",
.name = "user",
.type = QEMU_OPT_NUMBER,
.help = "UID value to use when talking to the server",
},
{
.name = "gid",
.name = "group",
.type = QEMU_OPT_NUMBER,
.help = "GID value to use when talking to the server",
},
{
.name = "tcp-syncnt",
.name = "tcp-syn-count",
.type = QEMU_OPT_NUMBER,
.help = "Number of SYNs to send during the session establish",
},
{
.name = "readahead",
.name = "readahead-size",
.type = QEMU_OPT_NUMBER,
.help = "Set the readahead size in bytes",
},
{
.name = "pagecache",
.name = "page-cache-size",
.type = QEMU_OPT_NUMBER,
.help = "Set the pagecache size in bytes",
},
@@ -507,29 +521,29 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
goto fail;
}
if (qemu_opt_get(opts, "uid")) {
client->uid = qemu_opt_get_number(opts, "uid", 0);
if (qemu_opt_get(opts, "user")) {
client->uid = qemu_opt_get_number(opts, "user", 0);
nfs_set_uid(client->context, client->uid);
}
if (qemu_opt_get(opts, "gid")) {
client->gid = qemu_opt_get_number(opts, "gid", 0);
if (qemu_opt_get(opts, "group")) {
client->gid = qemu_opt_get_number(opts, "group", 0);
nfs_set_gid(client->context, client->gid);
}
if (qemu_opt_get(opts, "tcp-syncnt")) {
client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syncnt", 0);
if (qemu_opt_get(opts, "tcp-syn-count")) {
client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syn-count", 0);
nfs_set_tcp_syncnt(client->context, client->tcp_syncnt);
}
#ifdef LIBNFS_FEATURE_READAHEAD
if (qemu_opt_get(opts, "readahead")) {
if (qemu_opt_get(opts, "readahead-size")) {
if (open_flags & BDRV_O_NOCACHE) {
error_setg(errp, "Cannot enable NFS readahead "
"if cache.direct = on");
goto fail;
}
client->readahead = qemu_opt_get_number(opts, "readahead", 0);
client->readahead = qemu_opt_get_number(opts, "readahead-size", 0);
if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
error_report("NFS Warning: Truncating NFS readahead "
"size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
@@ -544,13 +558,13 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
#endif
#ifdef LIBNFS_FEATURE_PAGECACHE
if (qemu_opt_get(opts, "pagecache")) {
if (qemu_opt_get(opts, "page-cache-size")) {
if (open_flags & BDRV_O_NOCACHE) {
error_setg(errp, "Cannot enable NFS pagecache "
"if cache.direct = on");
goto fail;
}
client->pagecache = qemu_opt_get_number(opts, "pagecache", 0);
client->pagecache = qemu_opt_get_number(opts, "page-cache-size", 0);
if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
error_report("NFS Warning: Truncating NFS pagecache "
"size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
@@ -797,28 +811,26 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
ov = qobject_output_visitor_new(&server_qdict);
visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
visit_complete(ov, &server_qdict);
assert(qobject_type(server_qdict) == QTYPE_QDICT);
qdict_put_obj(opts, "server", server_qdict);
qdict_put(opts, "path", qstring_from_str(client->path));
if (client->uid) {
qdict_put(opts, "uid", qint_from_int(client->uid));
qdict_put(opts, "user", qint_from_int(client->uid));
}
if (client->gid) {
qdict_put(opts, "gid", qint_from_int(client->gid));
qdict_put(opts, "group", qint_from_int(client->gid));
}
if (client->tcp_syncnt) {
qdict_put(opts, "tcp-syncnt",
qint_from_int(client->tcp_syncnt));
qdict_put(opts, "tcp-syn-cnt",
qint_from_int(client->tcp_syncnt));
}
if (client->readahead) {
qdict_put(opts, "readahead",
qint_from_int(client->readahead));
qdict_put(opts, "readahead-size",
qint_from_int(client->readahead));
}
if (client->pagecache) {
qdict_put(opts, "pagecache",
qint_from_int(client->pagecache));
qdict_put(opts, "page-cache-size",
qint_from_int(client->pagecache));
}
if (client->debug) {
qdict_put(opts, "debug", qint_from_int(client->debug));
@@ -855,8 +867,8 @@ static BlockDriver bdrv_nfs = {
.bdrv_create = nfs_file_create,
.bdrv_reopen_prepare = nfs_reopen_prepare,
.bdrv_co_readv = nfs_co_readv,
.bdrv_co_writev = nfs_co_writev,
.bdrv_co_preadv = nfs_co_preadv,
.bdrv_co_pwritev = nfs_co_pwritev,
.bdrv_co_flush_to_disk = nfs_co_flush,
.bdrv_detach_aio_context = nfs_detach_aio_context,

View File

@@ -215,7 +215,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
s->data_end << BDRV_SECTOR_BITS,
space << BDRV_SECTOR_BITS, 0);
} else {
ret = bdrv_truncate(bs->file->bs,
ret = bdrv_truncate(bs->file,
(s->data_end + space) << BDRV_SECTOR_BITS);
}
if (ret < 0) {
@@ -449,7 +449,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
size - res->image_end_offset);
res->leaks += count;
if (fix & BDRV_FIX_LEAKS) {
ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
ret = bdrv_truncate(bs->file, res->image_end_offset);
if (ret < 0) {
res->check_errors++;
return ret;
@@ -581,6 +581,12 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
Error *local_err = NULL;
char *buf;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
if (ret < 0) {
goto fail;
@@ -681,7 +687,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
goto fail_options;
}
if (!bdrv_has_zero_init(bs->file->bs) ||
bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs)) != 0) {
s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
}
@@ -724,7 +730,7 @@ static void parallels_close(BlockDriverState *bs)
}
if (bs->open_flags & BDRV_O_RDWR) {
bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
}
g_free(s->bat_dirty_bmap);

View File

@@ -237,8 +237,8 @@ void bdrv_query_image_info(BlockDriverState *bs,
size = bdrv_getlength(bs);
if (size < 0) {
error_setg_errno(errp, -size, "Can't get size of device '%s'",
bdrv_get_device_name(bs));
error_setg_errno(errp, -size, "Can't get image size '%s'",
bs->exact_filename);
goto out;
}
@@ -357,10 +357,6 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
qapi_free_BlockInfo(info);
}
static BlockStats *bdrv_query_stats(BlockBackend *blk,
const BlockDriverState *bs,
bool query_backing);
static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
{
BlockAcctStats *stats = blk_get_stats(blk);
@@ -428,9 +424,18 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
}
}
static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,
bool query_backing)
{
BlockStats *s = NULL;
s = g_malloc0(sizeof(*s));
s->stats = g_malloc0(sizeof(*s->stats));
if (!bs) {
return s;
}
if (bdrv_get_node_name(bs)[0]) {
s->has_node_name = true;
s->node_name = g_strdup(bdrv_get_node_name(bs));
@@ -440,32 +445,12 @@ static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
if (bs->file) {
s->has_parent = true;
s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
s->parent = bdrv_query_bds_stats(bs->file->bs, query_backing);
}
if (query_backing && bs->backing) {
s->has_backing = true;
s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
}
}
static BlockStats *bdrv_query_stats(BlockBackend *blk,
const BlockDriverState *bs,
bool query_backing)
{
BlockStats *s;
s = g_malloc0(sizeof(*s));
s->stats = g_malloc0(sizeof(*s->stats));
if (blk) {
s->has_device = true;
s->device = g_strdup(blk_name(blk));
bdrv_query_blk_stats(s->stats, blk);
}
if (bs) {
bdrv_query_bds_stats(s, bs, query_backing);
s->backing = bdrv_query_bds_stats(bs->backing->bs, query_backing);
}
return s;
@@ -494,42 +479,44 @@ BlockInfoList *qmp_query_block(Error **errp)
return head;
}
static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
bool query_nodes)
{
if (query_nodes) {
*bs = bdrv_next_node(*bs);
return !!*bs;
}
*blk = blk_next(*blk);
*bs = *blk ? blk_bs(*blk) : NULL;
return !!*blk;
}
BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
bool query_nodes,
Error **errp)
{
BlockStatsList *head = NULL, **p_next = &head;
BlockBackend *blk = NULL;
BlockDriverState *bs = NULL;
BlockBackend *blk;
BlockDriverState *bs;
/* Just to be safe if query_nodes is not always initialized */
query_nodes = has_query_nodes && query_nodes;
if (has_query_nodes && query_nodes) {
for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) {
BlockStatsList *info = g_malloc0(sizeof(*info));
AioContext *ctx = bdrv_get_aio_context(bs);
while (next_query_bds(&blk, &bs, query_nodes)) {
BlockStatsList *info = g_malloc0(sizeof(*info));
AioContext *ctx = blk ? blk_get_aio_context(blk)
: bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
info->value = bdrv_query_bds_stats(bs, false);
aio_context_release(ctx);
aio_context_acquire(ctx);
info->value = bdrv_query_stats(blk, bs, !query_nodes);
aio_context_release(ctx);
*p_next = info;
p_next = &info->next;
}
} else {
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
BlockStatsList *info = g_malloc0(sizeof(*info));
AioContext *ctx = blk_get_aio_context(blk);
BlockStats *s;
*p_next = info;
p_next = &info->next;
aio_context_acquire(ctx);
s = bdrv_query_bds_stats(blk_bs(blk), true);
s->has_device = true;
s->device = g_strdup(blk_name(blk));
bdrv_query_blk_stats(s->stats, blk);
aio_context_release(ctx);
info->value = s;
*p_next = info;
p_next = &info->next;
}
}
return head;
@@ -695,7 +682,6 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
visit_complete(v, &obj);
assert(qobject_type(obj) == QTYPE_QDICT);
data = qdict_get(qobject_to_qdict(obj), "data");
dump_qobject(func_fprintf, f, 1, data);
qobject_decref(obj);

View File

@@ -106,6 +106,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
QCowHeader header;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
if (ret < 0) {
goto fail;
@@ -467,7 +473,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* round to cluster size */
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
/* if encrypted, we must initialize the cluster
content which won't be written */
if (bs->encrypted &&
@@ -909,7 +915,7 @@ static int qcow_make_empty(BlockDriverState *bs)
if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
l1_length) < 0)
return -1;
ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
if (ret < 0)
return ret;

View File

@@ -932,9 +932,7 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
if (bytes == 0) {
/* Wait for the dependency to complete. We need to recheck
* the free/allocated clusters when we continue. */
qemu_co_mutex_unlock(&s->lock);
qemu_co_queue_wait(&old_alloc->dependent_requests);
qemu_co_mutex_lock(&s->lock);
qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
return -EAGAIN;
}
}

View File

@@ -83,6 +83,16 @@ static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
/*********************************************************/
/* refcount handling */
static void update_max_refcount_table_index(BDRVQcow2State *s)
{
unsigned i = s->refcount_table_size - 1;
while (i > 0 && (s->refcount_table[i] & REFT_OFFSET_MASK) == 0) {
i--;
}
/* Set s->max_refcount_table_index to the index of the last used entry */
s->max_refcount_table_index = i;
}
int qcow2_refcount_init(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
@@ -111,6 +121,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
}
for(i = 0; i < s->refcount_table_size; i++)
be64_to_cpus(&s->refcount_table[i]);
update_max_refcount_table_index(s);
}
return 0;
fail:
@@ -439,6 +450,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
s->refcount_table[refcount_table_index] = new_block;
/* If there's a hole in s->refcount_table then it can happen
* that refcount_table_index < s->max_refcount_table_index */
s->max_refcount_table_index =
MAX(s->max_refcount_table_index, refcount_table_index);
/* The new refcount block may be where the caller intended to put its
* data, so let it restart the search. */
@@ -580,6 +595,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
s->refcount_table = new_table;
s->refcount_table_size = table_size;
s->refcount_table_offset = table_offset;
update_max_refcount_table_index(s);
/* Free old table. */
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
@@ -1718,7 +1734,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
goto resize_fail;
}
ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
ret = bdrv_truncate(bs->file, offset + s->cluster_size);
if (ret < 0) {
goto resize_fail;
}
@@ -2171,6 +2187,7 @@ write_refblocks:
s->refcount_table = on_disk_reftable;
s->refcount_table_offset = reftable_offset;
s->refcount_table_size = reftable_size;
update_max_refcount_table_index(s);
return 0;
@@ -2383,7 +2400,11 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
}
if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
for (i = 0; i < s->refcount_table_size; i++) {
unsigned last_entry = s->max_refcount_table_index;
assert(last_entry < s->refcount_table_size);
assert(last_entry + 1 == s->refcount_table_size ||
(s->refcount_table[last_entry + 1] & REFT_OFFSET_MASK) == 0);
for (i = 0; i <= last_entry; i++) {
if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
s->cluster_size)) {
@@ -2871,6 +2892,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
/* Now update the rest of the in-memory information */
old_reftable = s->refcount_table;
s->refcount_table = new_reftable;
update_max_refcount_table_index(s);
s->refcount_bits = 1 << refcount_order;
s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);

View File

@@ -814,8 +814,8 @@ static int qcow2_update_options(BlockDriverState *bs, QDict *options,
return ret;
}
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVQcow2State *s = bs->opaque;
unsigned int len, i;
@@ -1205,6 +1205,18 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
}
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
return qcow2_do_open(bs, options, flags, errp);
}
static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
@@ -1785,7 +1797,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
options = qdict_clone_shallow(bs->options);
flags &= ~BDRV_O_INACTIVE;
ret = qcow2_open(bs, options, flags, &local_err);
ret = qcow2_do_open(bs, options, flags, &local_err);
QDECREF(options);
if (local_err) {
error_propagate(errp, local_err);
@@ -2570,7 +2582,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
cluster_offset = bdrv_getlength(bs->file->bs);
return bdrv_truncate(bs->file->bs, cluster_offset);
return bdrv_truncate(bs->file, cluster_offset);
}
buf = qemu_blockalign(bs, s->cluster_size);
@@ -2743,6 +2755,7 @@ static int make_completely_empty(BlockDriverState *bs)
s->refcount_table_offset = s->cluster_size;
s->refcount_table_size = s->cluster_size / sizeof(uint64_t);
s->max_refcount_table_index = 0;
g_free(s->refcount_table);
s->refcount_table = new_reftable;
@@ -2783,7 +2796,7 @@ static int make_completely_empty(BlockDriverState *bs)
goto fail;
}
ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -3249,7 +3262,11 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
}
if (new_size) {
ret = bdrv_truncate(bs, new_size);
BlockBackend *blk = blk_new();
blk_insert_bs(blk, bs);
ret = blk_truncate(blk, new_size);
blk_unref(blk);
if (ret < 0) {
return ret;
}

View File

@@ -251,6 +251,7 @@ typedef struct BDRVQcow2State {
uint64_t *refcount_table;
uint64_t refcount_table_offset;
uint32_t refcount_table_size;
uint32_t max_refcount_table_index; /* Last used entry in refcount_table */
uint64_t free_cluster_index;
uint64_t free_byte_offset;

View File

@@ -83,6 +83,7 @@ static void qed_find_cluster_cb(void *opaque, int ret)
unsigned int index;
unsigned int n;
qed_acquire(s);
if (ret) {
goto out;
}
@@ -109,6 +110,7 @@ static void qed_find_cluster_cb(void *opaque, int ret)
out:
find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
qed_release(s);
g_free(find_cluster_cb);
}

View File

@@ -31,6 +31,7 @@ static void qed_read_table_cb(void *opaque, int ret)
{
QEDReadTableCB *read_table_cb = opaque;
QEDTable *table = read_table_cb->table;
BDRVQEDState *s = read_table_cb->s;
int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
int i;
@@ -40,13 +41,15 @@ static void qed_read_table_cb(void *opaque, int ret)
}
/* Byteswap offsets */
qed_acquire(s);
for (i = 0; i < noffsets; i++) {
table->offsets[i] = le64_to_cpu(table->offsets[i]);
}
qed_release(s);
out:
/* Completion */
trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
trace_qed_read_table_cb(s, read_table_cb->table, ret);
gencb_complete(&read_table_cb->gencb, ret);
}
@@ -84,8 +87,9 @@ typedef struct {
static void qed_write_table_cb(void *opaque, int ret)
{
QEDWriteTableCB *write_table_cb = opaque;
BDRVQEDState *s = write_table_cb->s;
trace_qed_write_table_cb(write_table_cb->s,
trace_qed_write_table_cb(s,
write_table_cb->orig_table,
write_table_cb->flush,
ret);
@@ -97,8 +101,10 @@ static void qed_write_table_cb(void *opaque, int ret)
if (write_table_cb->flush) {
/* We still need to flush first */
write_table_cb->flush = false;
qed_acquire(s);
bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
write_table_cb);
qed_release(s);
return;
}
@@ -213,6 +219,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
CachedL2Table *l2_table = request->l2_table;
uint64_t l2_offset = read_l2_table_cb->l2_offset;
qed_acquire(s);
if (ret) {
/* can't trust loaded L2 table anymore */
qed_unref_l2_cache_entry(l2_table);
@@ -228,6 +235,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
assert(request->l2_table != NULL);
}
qed_release(s);
gencb_complete(&read_l2_table_cb->gencb, ret);
}

View File

@@ -273,7 +273,19 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
return l2_table;
}
static void qed_aio_next_io(void *opaque, int ret);
static void qed_aio_next_io(QEDAIOCB *acb, int ret);
static void qed_aio_start_io(QEDAIOCB *acb)
{
qed_aio_next_io(acb, 0);
}
static void qed_aio_next_io_cb(void *opaque, int ret)
{
QEDAIOCB *acb = opaque;
qed_aio_next_io(acb, ret);
}
static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
{
@@ -292,7 +304,7 @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
if (acb) {
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
}
}
@@ -333,10 +345,22 @@ static void qed_need_check_timer_cb(void *opaque)
trace_qed_need_check_timer_cb(s);
qed_acquire(s);
qed_plug_allocating_write_reqs(s);
/* Ensure writes are on disk before clearing flag */
bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
qed_release(s);
}
void qed_acquire(BDRVQEDState *s)
{
aio_context_acquire(bdrv_get_aio_context(s->bs));
}
void qed_release(BDRVQEDState *s)
{
aio_context_release(bdrv_get_aio_context(s->bs));
}
static void qed_start_need_check_timer(BDRVQEDState *s)
@@ -391,8 +415,8 @@ static void bdrv_qed_drain(BlockDriverState *bs)
}
}
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVQEDState *s = bs->opaque;
QEDHeader le_header;
@@ -526,6 +550,18 @@ out:
return ret;
}
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
return bdrv_qed_do_open(bs, options, flags, errp);
}
static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQEDState *s = bs->opaque;
@@ -721,7 +757,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
}
if (cb->co) {
qemu_coroutine_enter(cb->co);
aio_co_wake(cb->co);
}
}
@@ -918,6 +954,7 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
static void qed_aio_complete_bh(void *opaque)
{
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
BlockCompletionFunc *cb = acb->common.cb;
void *user_opaque = acb->common.opaque;
int ret = acb->bh_ret;
@@ -925,7 +962,9 @@ static void qed_aio_complete_bh(void *opaque)
qemu_aio_unref(acb);
/* Invoke callback */
qed_acquire(s);
cb(user_opaque, ret);
qed_release(s);
}
static void qed_aio_complete(QEDAIOCB *acb, int ret)
@@ -959,7 +998,7 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
if (acb) {
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
} else if (s->header.features & QED_F_NEED_CHECK) {
qed_start_need_check_timer(s);
}
@@ -984,7 +1023,7 @@ static void qed_commit_l2_update(void *opaque, int ret)
acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
assert(acb->request.l2_table != NULL);
qed_aio_next_io(opaque, ret);
qed_aio_next_io(acb, ret);
}
/**
@@ -1032,11 +1071,11 @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
if (need_alloc) {
/* Write out the whole new L2 table */
qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
qed_aio_write_l1_update, acb);
qed_aio_write_l1_update, acb);
} else {
/* Write out only the updated part of the L2 table */
qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
qed_aio_next_io, acb);
qed_aio_next_io_cb, acb);
}
return;
@@ -1088,7 +1127,7 @@ static void qed_aio_write_main(void *opaque, int ret)
}
if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
next_fn = qed_aio_next_io;
next_fn = qed_aio_next_io_cb;
} else {
if (s->bs->backing) {
next_fn = qed_aio_write_flush_before_l2_update;
@@ -1201,7 +1240,7 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
if (acb->flags & QED_AIOCB_ZERO) {
/* Skip ahead if the clusters are already zero */
if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
return;
}
@@ -1321,18 +1360,18 @@ static void qed_aio_read_data(void *opaque, int ret,
/* Handle zero cluster and backing file reads */
if (ret == QED_CLUSTER_ZERO) {
qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
return;
} else if (ret != QED_CLUSTER_FOUND) {
qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
&acb->backing_qiov, qed_aio_next_io, acb);
&acb->backing_qiov, qed_aio_next_io_cb, acb);
return;
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
qed_aio_next_io, acb);
qed_aio_next_io_cb, acb);
return;
err:
@@ -1342,9 +1381,8 @@ err:
/**
* Begin next I/O or complete the request
*/
static void qed_aio_next_io(void *opaque, int ret)
static void qed_aio_next_io(QEDAIOCB *acb, int ret)
{
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
qed_aio_write_data : qed_aio_read_data;
@@ -1400,7 +1438,7 @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
qemu_iovec_init(&acb->cur_qiov, qiov->niov);
/* Start request */
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
return &acb->common;
}
@@ -1436,7 +1474,7 @@ static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
cb->done = true;
cb->ret = ret;
if (cb->co) {
qemu_coroutine_enter(cb->co);
aio_co_wake(cb->co);
}
}
@@ -1603,7 +1641,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
bdrv_qed_close(bs);
memset(s, 0, sizeof(BDRVQEDState));
ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
if (local_err) {
error_propagate(errp, local_err);
error_prepend(errp, "Could not reopen qed layer: ");

View File

@@ -198,6 +198,9 @@ enum {
*/
typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
void qed_acquire(BDRVQEDState *s);
void qed_release(BDRVQEDState *s);
/**
* Generic callback for chaining async callbacks
*/

View File

@@ -341,7 +341,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)
s->size = offset;
offset += s->offset;
return bdrv_truncate(bs->file->bs, offset);
return bdrv_truncate(bs->file, offset);
}
static int raw_media_changed(BlockDriverState *bs)
@@ -384,6 +384,12 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
BDRVRawState *s = bs->opaque;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
bs->sg = bs->file->bs->sg;
bs->supported_write_flags = BDRV_REQ_FUA &
bs->file->bs->supported_write_flags;

View File

@@ -62,6 +62,13 @@
#define RBD_MAX_SNAP_NAME_SIZE 128
#define RBD_MAX_SNAPS 100
/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
#ifdef LIBRBD_SUPPORTS_IOVEC
#define LIBRBD_USE_IOVEC 1
#else
#define LIBRBD_USE_IOVEC 0
#endif
typedef enum {
RBD_AIO_READ,
RBD_AIO_WRITE,
@@ -310,6 +317,17 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
return ret;
}
static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
{
if (LIBRBD_USE_IOVEC) {
RBDAIOCB *acb = rcb->acb;
iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
acb->qiov->size - offs);
} else {
memset(rcb->buf + offs, 0, rcb->size - offs);
}
}
static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
{
Error *local_err = NULL;
@@ -426,11 +444,11 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
}
} else {
if (r < 0) {
memset(rcb->buf, 0, rcb->size);
qemu_rbd_memset(rcb, 0);
acb->ret = r;
acb->error = 1;
} else if (r < rcb->size) {
memset(rcb->buf + r, 0, rcb->size - r);
qemu_rbd_memset(rcb, r);
if (!acb->error) {
acb->ret = rcb->size;
}
@@ -441,10 +459,13 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
g_free(rcb);
if (acb->cmd == RBD_AIO_READ) {
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
if (!LIBRBD_USE_IOVEC) {
if (acb->cmd == RBD_AIO_READ) {
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
}
qemu_vfree(acb->bounce);
}
qemu_vfree(acb->bounce);
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
qemu_aio_unref(acb);
@@ -655,7 +676,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
RBDAIOCB *acb;
RADOSCB *rcb = NULL;
rbd_completion_t c;
char *buf;
int r;
BDRVRBDState *s = bs->opaque;
@@ -664,27 +684,29 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
acb->cmd = cmd;
acb->qiov = qiov;
assert(!qiov || qiov->size == size);
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
acb->bounce = qemu_try_blockalign(bs, qiov->size);
if (acb->bounce == NULL) {
goto failed;
rcb = g_new(RADOSCB, 1);
if (!LIBRBD_USE_IOVEC) {
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
acb->bounce = qemu_try_blockalign(bs, qiov->size);
if (acb->bounce == NULL) {
goto failed;
}
}
if (cmd == RBD_AIO_WRITE) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
}
rcb->buf = acb->bounce;
}
acb->ret = 0;
acb->error = 0;
acb->s = s;
if (cmd == RBD_AIO_WRITE) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
}
buf = acb->bounce;
rcb = g_new(RADOSCB, 1);
rcb->acb = acb;
rcb->buf = buf;
rcb->s = acb->s;
rcb->size = size;
r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
@@ -694,10 +716,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
switch (cmd) {
case RBD_AIO_WRITE:
r = rbd_aio_write(s->image, off, size, buf, c);
#ifdef LIBRBD_SUPPORTS_IOVEC
r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
#else
r = rbd_aio_write(s->image, off, size, rcb->buf, c);
#endif
break;
case RBD_AIO_READ:
r = rbd_aio_read(s->image, off, size, buf, c);
#ifdef LIBRBD_SUPPORTS_IOVEC
r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
#else
r = rbd_aio_read(s->image, off, size, rcb->buf, c);
#endif
break;
case RBD_AIO_DISCARD:
r = rbd_aio_discard_wrapper(s->image, off, size, c);
@@ -712,14 +742,16 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
if (r < 0) {
goto failed_completion;
}
return &acb->common;
failed_completion:
rbd_aio_release(c);
failed:
g_free(rcb);
qemu_vfree(acb->bounce);
if (!LIBRBD_USE_IOVEC) {
qemu_vfree(acb->bounce);
}
qemu_aio_unref(acb);
return NULL;
}

View File

@@ -86,6 +86,12 @@ static int replication_open(BlockDriverState *bs, QDict *options,
const char *mode;
const char *top_id;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
ret = -EINVAL;
opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);

View File

@@ -486,7 +486,7 @@ static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
retry:
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
if (AIOCBOverlapping(acb, cb)) {
qemu_co_queue_wait(&s->overlapping_queue);
qemu_co_queue_wait(&s->overlapping_queue, NULL);
goto retry;
}
}
@@ -575,13 +575,6 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
return ret;
}
static void restart_co_req(void *opaque)
{
Coroutine *co = opaque;
qemu_coroutine_enter(co);
}
typedef struct SheepdogReqCo {
int sockfd;
BlockDriverState *bs;
@@ -592,12 +585,19 @@ typedef struct SheepdogReqCo {
unsigned int *rlen;
int ret;
bool finished;
Coroutine *co;
} SheepdogReqCo;
static void restart_co_req(void *opaque)
{
SheepdogReqCo *srco = opaque;
aio_co_wake(srco->co);
}
static coroutine_fn void do_co_req(void *opaque)
{
int ret;
Coroutine *co;
SheepdogReqCo *srco = opaque;
int sockfd = srco->sockfd;
SheepdogReq *hdr = srco->hdr;
@@ -605,9 +605,9 @@ static coroutine_fn void do_co_req(void *opaque)
unsigned int *wlen = srco->wlen;
unsigned int *rlen = srco->rlen;
co = qemu_coroutine_self();
srco->co = qemu_coroutine_self();
aio_set_fd_handler(srco->aio_context, sockfd, false,
NULL, restart_co_req, NULL, co);
NULL, restart_co_req, NULL, srco);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
@@ -615,7 +615,7 @@ static coroutine_fn void do_co_req(void *opaque)
}
aio_set_fd_handler(srco->aio_context, sockfd, false,
restart_co_req, NULL, NULL, co);
restart_co_req, NULL, NULL, srco);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
@@ -643,6 +643,7 @@ out:
aio_set_fd_handler(srco->aio_context, sockfd, false,
NULL, NULL, NULL, NULL);
srco->co = NULL;
srco->ret = ret;
srco->finished = true;
if (srco->bs) {
@@ -866,7 +867,7 @@ static void coroutine_fn aio_read_response(void *opaque)
* We've finished all requests which belong to the AIOCB, so
* we can switch back to sd_co_readv/writev now.
*/
qemu_coroutine_enter(acb->coroutine);
aio_co_wake(acb->coroutine);
}
return;
@@ -883,14 +884,14 @@ static void co_read_response(void *opaque)
s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
}
qemu_coroutine_enter(s->co_recv);
aio_co_wake(s->co_recv);
}
static void co_write_request(void *opaque)
{
BDRVSheepdogState *s = opaque;
qemu_coroutine_enter(s->co_send);
aio_co_wake(s->co_send);
}
/*

View File

@@ -889,10 +889,14 @@ static void restart_coroutine(void *opaque)
DPRINTF("co=%p", co);
qemu_coroutine_enter(co);
aio_co_wake(co);
}
static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
/* A non-blocking call returned EAGAIN, so yield, ensuring the
* handlers are set up so that we'll be rescheduled when there is an
* interesting event on the socket.
*/
static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
{
int r;
IOHandler *rd_handler = NULL, *wr_handler = NULL;
@@ -912,25 +916,10 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
false, rd_handler, wr_handler, NULL, co);
}
static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
BlockDriverState *bs)
{
DPRINTF("s->sock=%d", s->sock);
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
false, NULL, NULL, NULL, NULL);
}
/* A non-blocking call returned EAGAIN, so yield, ensuring the
* handlers are set up so that we'll be rescheduled when there is an
* interesting event on the socket.
*/
static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
{
set_fd_handler(s, bs);
qemu_coroutine_yield();
clear_fd_handler(s, bs);
DPRINTF("s->sock=%d - back", s->sock);
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
NULL, NULL, NULL, NULL);
}
/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position

View File

@@ -326,7 +326,7 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
if (must_wait || blkp->pending_reqs[is_write]) {
blkp->pending_reqs[is_write]++;
qemu_mutex_unlock(&tg->lock);
qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
qemu_mutex_lock(&tg->lock);
blkp->pending_reqs[is_write]--;
}
@@ -416,7 +416,9 @@ static void timer_cb(BlockBackend *blk, bool is_write)
qemu_mutex_unlock(&tg->lock);
/* Run the request that was waiting for this timer */
aio_context_acquire(blk_get_aio_context(blk));
empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
aio_context_release(blk_get_aio_context(blk));
/* If the request queue was empty then we have to take care of
* scheduling the next one */

View File

@@ -363,6 +363,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
logout("\n");
ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);

View File

@@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
if (new_file_size % (1024*1024)) {
/* round up to nearest 1MB boundary */
new_file_size = ((new_file_size >> 20) + 1) << 20;
bdrv_truncate(bs->file->bs, new_file_size);
bdrv_truncate(bs->file, new_file_size);
}
}
qemu_vfree(desc_entries);

View File

@@ -898,6 +898,12 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
uint64_t signature;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
s->bat = NULL;
s->first_visible_write = true;
@@ -1165,7 +1171,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
/* per the spec, the address for a block is in units of 1MB */
*new_offset = ROUND_UP(*new_offset, 1024 * 1024);
return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
return bdrv_truncate(bs->file, *new_offset + s->block_size);
}
/*

View File

@@ -943,6 +943,12 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
uint32_t magic;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
buf = vmdk_read_desc(bs->file, 0, errp);
if (!buf) {
return -EINVAL;
@@ -1361,8 +1367,8 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
goto out;
}
data->lba = offset >> BDRV_SECTOR_BITS;
data->size = buf_len;
data->lba = cpu_to_le64(offset >> BDRV_SECTOR_BITS);
data->size = cpu_to_le32(buf_len);
n_bytes = buf_len + sizeof(VmdkGrainMarker);
iov = (struct iovec) {

View File

@@ -220,6 +220,12 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int disk_type = VHD_DYNAMIC;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {

View File

@@ -2968,6 +2968,7 @@ static void write_target_close(BlockDriverState *bs) {
static BlockDriver vvfat_write_target = {
.format_name = "vvfat_write_target",
.instance_size = sizeof(void*),
.bdrv_co_pwritev = write_target_commit,
.bdrv_close = write_target_close,
};
@@ -3036,14 +3037,13 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
unlink(s->qcow_filename);
#endif
backing = bdrv_new();
backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR,
&error_abort);
*(void**) backing->opaque = s;
bdrv_set_backing_hd(s->bs, backing);
bdrv_unref(backing);
s->bs->backing->bs->drv = &vvfat_write_target;
s->bs->backing->bs->opaque = g_new(void *, 1);
*(void**)s->bs->backing->bs->opaque = s;
return 0;
err:

View File

@@ -41,7 +41,7 @@ struct QEMUWin32AIOState {
HANDLE hIOCP;
EventNotifier e;
int count;
bool is_aio_context_attached;
AioContext *aio_ctx;
};
typedef struct QEMUWin32AIOCB {
@@ -87,7 +87,6 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
qemu_vfree(waiocb->buf);
}
waiocb->common.cb(waiocb->common.opaque, ret);
qemu_aio_unref(waiocb);
}
@@ -176,13 +175,13 @@ void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
AioContext *old_context)
{
aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
aio->is_aio_context_attached = false;
aio->aio_ctx = NULL;
}
void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
AioContext *new_context)
{
aio->is_aio_context_attached = true;
aio->aio_ctx = new_context;
aio_set_event_notifier(new_context, &aio->e, false,
win32_aio_completion_cb, NULL);
}
@@ -212,7 +211,7 @@ out_free_state:
void win32_aio_cleanup(QEMUWin32AIOState *aio)
{
assert(!aio->is_aio_context_attached);
assert(!aio->aio_ctx);
CloseHandle(aio->hIOCP);
event_notifier_cleanup(&aio->e);
g_free(aio);

View File

@@ -227,27 +227,30 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
return NULL;
}
bool drive_check_orphaned(void)
void drive_check_orphaned(void)
{
BlockBackend *blk;
DriveInfo *dinfo;
bool rs = false;
Location loc;
bool orphans = false;
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
/* If dinfo->bdrv->dev is NULL, it has no device attached. */
/* Unless this is a default drive, this may be an oversight. */
if (!blk_get_attached_dev(blk) && !dinfo->is_default &&
dinfo->type != IF_NONE) {
fprintf(stderr, "Warning: Orphaned drive without device: "
"id=%s,file=%s,if=%s,bus=%d,unit=%d\n",
blk_name(blk), blk_bs(blk) ? blk_bs(blk)->filename : "",
if_name[dinfo->type], dinfo->bus, dinfo->unit);
rs = true;
loc_push_none(&loc);
qemu_opts_loc_restore(dinfo->opts);
error_report("machine type does not support"
" if=%s,bus=%d,unit=%d",
if_name[dinfo->type], dinfo->bus, dinfo->unit);
loc_pop(&loc);
orphans = true;
}
}
return rs;
if (orphans) {
exit(1);
}
}
DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
@@ -2855,6 +2858,7 @@ void qmp_block_resize(bool has_device, const char *device,
int64_t size, Error **errp)
{
Error *local_err = NULL;
BlockBackend *blk = NULL;
BlockDriverState *bs;
AioContext *aio_context;
int ret;
@@ -2885,10 +2889,13 @@ void qmp_block_resize(bool has_device, const char *device,
goto out;
}
blk = blk_new();
blk_insert_bs(blk, bs);
/* complete all in-flight operations before resizing the device */
bdrv_drain_all();
ret = bdrv_truncate(bs, size);
ret = blk_truncate(blk, size);
switch (ret) {
case 0:
break;
@@ -2910,6 +2917,7 @@ void qmp_block_resize(bool has_device, const char *device,
}
out:
blk_unref(blk);
aio_context_release(aio_context);
}

View File

@@ -129,7 +129,7 @@ static int char_pty_chr_write(Chardev *chr, const uint8_t *buf, int len)
/* guest sends data, check for (re-)connect */
pty_chr_update_read_handler_locked(chr);
if (!s->connected) {
return 0;
return len;
}
}
return io_channel_send(s->ioc, buf, len);

View File

@@ -97,6 +97,9 @@ static gboolean tcp_chr_accept(QIOChannel *chan,
GIOCondition cond,
void *opaque);
static int tcp_chr_read_poll(void *opaque);
static void tcp_chr_disconnect(Chardev *chr);
/* Called with chr_write_lock held. */
static int tcp_chr_write(Chardev *chr, const uint8_t *buf, int len)
{
@@ -114,6 +117,13 @@ static int tcp_chr_write(Chardev *chr, const uint8_t *buf, int len)
s->write_msgfds_num = 0;
}
if (ret < 0 && errno != EAGAIN) {
if (tcp_chr_read_poll(chr) <= 0) {
tcp_chr_disconnect(chr);
return len;
} /* else let the read handler finish it properly */
}
return ret;
} else {
/* XXX: indicate an error ? */

View File

@@ -652,6 +652,7 @@ QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
if (strcmp(filename, "null") == 0 ||
strcmp(filename, "pty") == 0 ||
strcmp(filename, "msmouse") == 0 ||
strcmp(filename, "wctablet") == 0 ||
strcmp(filename, "braille") == 0 ||
strcmp(filename, "testdev") == 0 ||
strcmp(filename, "stdio") == 0) {

20
configure vendored
View File

@@ -3378,7 +3378,7 @@ fi
fdt_required=no
for target in $target_list; do
case $target in
aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu)
aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu|mips64el-softmmu)
fdt_required=yes
;;
esac
@@ -3396,11 +3396,11 @@ fi
if test "$fdt" != "no" ; then
fdt_libs="-lfdt"
# explicitly check for libfdt_env.h as it is missing in some stable installs
# and test for required functions to make sure we are on a version >= 1.4.0
# and test for required functions to make sure we are on a version >= 1.4.2
cat > $TMPC << EOF
#include <libfdt.h>
#include <libfdt_env.h>
int main(void) { fdt_get_property_by_offset(0, 0, 0); return 0; }
int main(void) { fdt_first_subnode(0, 0); return 0; }
EOF
if compile_prog "" "$fdt_libs" ; then
# system DTC is good - use it
@@ -3418,7 +3418,7 @@ EOF
fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
elif test "$fdt" = "yes" ; then
# have neither and want - prompt for system/submodule install
error_exit "DTC (libfdt) version >= 1.4.0 not present. Your options:" \
error_exit "DTC (libfdt) version >= 1.4.2 not present. Your options:" \
" (1) Preferred: Install the DTC (libfdt) devel package" \
" (2) Fetch the DTC submodule, using:" \
" git submodule update --init dtc"
@@ -5843,7 +5843,7 @@ target_name=$(echo $target | cut -d '-' -f 1)
target_bigendian="no"
case "$target_name" in
armeb|hppa|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or32|ppc|ppcemb|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
armeb|hppa|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or1k|ppc|ppcemb|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
target_bigendian=yes
;;
esac
@@ -5879,6 +5879,7 @@ mkdir -p $target_dir
echo "# Automatically generated by configure - do not modify" > $config_target_mak
bflt="no"
mttcg="no"
interp_prefix1=$(echo "$interp_prefix" | sed "s/%M/$target_name/g")
gdb_xml_files=""
@@ -5897,11 +5898,13 @@ case "$target_name" in
arm|armeb)
TARGET_ARCH=arm
bflt="yes"
mttcg="yes"
gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
;;
aarch64)
TARGET_BASE_ARCH=arm
bflt="yes"
mttcg="yes"
gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
;;
cris)
@@ -5937,7 +5940,7 @@ case "$target_name" in
;;
nios2)
;;
or32)
or1k)
TARGET_ARCH=openrisc
TARGET_BASE_ARCH=openrisc
;;
@@ -6066,6 +6069,9 @@ if test "$target_bigendian" = "yes" ; then
fi
if test "$target_softmmu" = "yes" ; then
echo "CONFIG_SOFTMMU=y" >> $config_target_mak
if test "$mttcg" = "yes" ; then
echo "TARGET_SUPPORTS_MTTCG=y" >> $config_target_mak
fi
fi
if test "$target_user_only" = "yes" ; then
echo "CONFIG_USER_ONLY=y" >> $config_target_mak
@@ -6145,7 +6151,7 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
nios2)
disas_config "NIOS2"
;;
or32)
or1k)
disas_config "OPENRISC"
;;
ppc*)

View File

@@ -23,9 +23,6 @@
#include "exec/exec-all.h"
#include "exec/memory-internal.h"
bool exit_request;
CPUState *tcg_current_cpu;
/* exit the current TB, but without causing any exception to be raised */
void cpu_loop_exit_noexc(CPUState *cpu)
{

View File

@@ -29,6 +29,7 @@
#include "qemu/rcu.h"
#include "exec/tb-hash.h"
#include "exec/log.h"
#include "qemu/main-loop.h"
#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
#include "hw/i386/apic.h"
#endif
@@ -227,20 +228,43 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
static void cpu_exec_step(CPUState *cpu)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_gen_code(cpu, pc, cs_base, flags,
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
tb->orig_tb = NULL;
/* execute the generated code */
trace_exec_tb_nocache(tb, pc);
cpu_tb_exec(cpu, tb);
tb_phys_invalidate(tb, -1);
tb_free(tb);
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
mmap_lock();
tb_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags,
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
tb->orig_tb = NULL;
tb_unlock();
mmap_unlock();
cc->cpu_exec_enter(cpu);
/* execute the generated code */
trace_exec_tb_nocache(tb, pc);
cpu_tb_exec(cpu, tb);
cc->cpu_exec_exit(cpu);
tb_lock();
tb_phys_invalidate(tb, -1);
tb_free(tb);
tb_unlock();
} else {
/* We may have exited due to another problem here, so we need
* to reset any tb_locks we may have taken but didn't release.
* The mmap_lock is dropped by tb_gen_code if it runs out of
* memory.
*/
#ifndef CONFIG_SOFTMMU
tcg_debug_assert(!have_mmap_lock());
#endif
tb_lock_reset();
}
}
void cpu_exec_step_atomic(CPUState *cpu)
@@ -384,12 +408,13 @@ static inline bool cpu_handle_halt(CPUState *cpu)
if ((cpu->interrupt_request & CPU_INTERRUPT_POLL)
&& replay_interrupt()) {
X86CPU *x86_cpu = X86_CPU(cpu);
qemu_mutex_lock_iothread();
apic_poll_irq(x86_cpu->apic_state);
cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
qemu_mutex_unlock_iothread();
}
#endif
if (!cpu_has_work(cpu)) {
current_cpu = NULL;
return true;
}
@@ -439,7 +464,9 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
#else
if (replay_exception()) {
CPUClass *cc = CPU_GET_CLASS(cpu);
qemu_mutex_lock_iothread();
cc->do_interrupt(cpu);
qemu_mutex_unlock_iothread();
cpu->exception_index = -1;
} else if (!replay_has_interrupt()) {
/* give a chance to iothread in replay mode */
@@ -461,13 +488,15 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
return false;
}
static inline void cpu_handle_interrupt(CPUState *cpu,
static inline bool cpu_handle_interrupt(CPUState *cpu,
TranslationBlock **last_tb)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
int interrupt_request = cpu->interrupt_request;
if (unlikely(interrupt_request)) {
if (unlikely(atomic_read(&cpu->interrupt_request))) {
int interrupt_request;
qemu_mutex_lock_iothread();
interrupt_request = cpu->interrupt_request;
if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
/* Mask out external interrupts for this step. */
interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
@@ -475,7 +504,8 @@ static inline void cpu_handle_interrupt(CPUState *cpu,
if (interrupt_request & CPU_INTERRUPT_DEBUG) {
cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
cpu->exception_index = EXCP_DEBUG;
cpu_loop_exit(cpu);
qemu_mutex_unlock_iothread();
return true;
}
if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
/* Do nothing */
@@ -484,23 +514,26 @@ static inline void cpu_handle_interrupt(CPUState *cpu,
cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
cpu->halted = 1;
cpu->exception_index = EXCP_HLT;
cpu_loop_exit(cpu);
qemu_mutex_unlock_iothread();
return true;
}
#if defined(TARGET_I386)
else if (interrupt_request & CPU_INTERRUPT_INIT) {
X86CPU *x86_cpu = X86_CPU(cpu);
CPUArchState *env = &x86_cpu->env;
replay_interrupt();
cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0);
cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
do_cpu_init(x86_cpu);
cpu->exception_index = EXCP_HALTED;
cpu_loop_exit(cpu);
qemu_mutex_unlock_iothread();
return true;
}
#else
else if (interrupt_request & CPU_INTERRUPT_RESET) {
replay_interrupt();
cpu_reset(cpu);
cpu_loop_exit(cpu);
qemu_mutex_unlock_iothread();
return true;
}
#endif
/* The target hook has 3 exit conditions:
@@ -522,12 +555,19 @@ static inline void cpu_handle_interrupt(CPUState *cpu,
the program flow was changed */
*last_tb = NULL;
}
/* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
qemu_mutex_unlock_iothread();
}
if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
atomic_set(&cpu->exit_request, 0);
cpu->exception_index = EXCP_INTERRUPT;
cpu_loop_exit(cpu);
return true;
}
return false;
}
static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
@@ -542,21 +582,19 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
trace_exec_tb(tb, tb->pc);
ret = cpu_tb_exec(cpu, tb);
*last_tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
*tb_exit = ret & TB_EXIT_MASK;
switch (*tb_exit) {
case TB_EXIT_REQUESTED:
/* Something asked us to stop executing
* chained TBs; just continue round the main
* loop. Whatever requested the exit will also
* have set something else (eg exit_request or
* interrupt_request) which we will handle
* next time around the loop. But we need to
* ensure the tcg_exit_req read in generated code
* comes before the next read of cpu->exit_request
* or cpu->interrupt_request.
/* Something asked us to stop executing chained TBs; just
* continue round the main loop. Whatever requested the exit
* will also have set something else (eg interrupt_request)
* which we will handle next time around the loop. But we
* need to ensure the tcg_exit_req read in generated code
* comes before the next read of cpu->exit_request or
* cpu->interrupt_request.
*/
smp_rmb();
smp_mb();
*last_tb = NULL;
break;
case TB_EXIT_ICOUNT_EXPIRED:
@@ -566,6 +604,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
abort();
#else
int insns_left = cpu->icount_decr.u32;
*last_tb = NULL;
if (cpu->icount_extra && insns_left >= 0) {
/* Refill decrementer and continue execution. */
cpu->icount_extra += insns_left;
@@ -575,17 +614,17 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
} else {
if (insns_left > 0) {
/* Execute remaining instructions. */
cpu_exec_nocache(cpu, insns_left, *last_tb, false);
cpu_exec_nocache(cpu, insns_left, tb, false);
align_clocks(sc, cpu);
}
cpu->exception_index = EXCP_INTERRUPT;
*last_tb = NULL;
cpu_loop_exit(cpu);
}
break;
#endif
}
default:
*last_tb = tb;
break;
}
}
@@ -605,13 +644,8 @@ int cpu_exec(CPUState *cpu)
return EXCP_HALTED;
}
atomic_mb_set(&tcg_current_cpu, cpu);
rcu_read_lock();
if (unlikely(atomic_mb_read(&exit_request))) {
cpu->exit_request = 1;
}
cc->cpu_exec_enter(cpu);
/* Calculate difference between guest clock and host clock.
@@ -621,50 +655,43 @@ int cpu_exec(CPUState *cpu)
*/
init_delay_params(&sc, cpu);
for(;;) {
/* prepare setjmp context for exception handling */
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
TranslationBlock *tb, *last_tb = NULL;
int tb_exit = 0;
/* if an exception is pending, we execute it here */
if (cpu_handle_exception(cpu, &ret)) {
break;
}
for(;;) {
cpu_handle_interrupt(cpu, &last_tb);
tb = tb_find(cpu, last_tb, tb_exit);
cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
/* Try to align the host and virtual clocks
if the guest is in advance */
align_clocks(&sc, cpu);
} /* for(;;) */
} else {
/* prepare setjmp context for exception handling */
if (sigsetjmp(cpu->jmp_env, 0) != 0) {
#if defined(__clang__) || !QEMU_GNUC_PREREQ(4, 6)
/* Some compilers wrongly smash all local variables after
* siglongjmp. There were bug reports for gcc 4.5.0 and clang.
* Reload essential local variables here for those compilers.
* Newer versions of gcc would complain about this code (-Wclobbered). */
cpu = current_cpu;
cc = CPU_GET_CLASS(cpu);
/* Some compilers wrongly smash all local variables after
* siglongjmp. There were bug reports for gcc 4.5.0 and clang.
* Reload essential local variables here for those compilers.
* Newer versions of gcc would complain about this code (-Wclobbered). */
cpu = current_cpu;
cc = CPU_GET_CLASS(cpu);
#else /* buggy compiler */
/* Assert that the compiler does not smash local variables. */
g_assert(cpu == current_cpu);
g_assert(cc == CPU_GET_CLASS(cpu));
/* Assert that the compiler does not smash local variables. */
g_assert(cpu == current_cpu);
g_assert(cc == CPU_GET_CLASS(cpu));
#endif /* buggy compiler */
cpu->can_do_io = 1;
tb_lock_reset();
cpu->can_do_io = 1;
tb_lock_reset();
if (qemu_mutex_iothread_locked()) {
qemu_mutex_unlock_iothread();
}
} /* for(;;) */
}
/* if an exception is pending, we execute it here */
while (!cpu_handle_exception(cpu, &ret)) {
TranslationBlock *last_tb = NULL;
int tb_exit = 0;
while (!cpu_handle_interrupt(cpu, &last_tb)) {
TranslationBlock *tb = tb_find(cpu, last_tb, tb_exit);
cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
/* Try to align the host and virtual clocks
if the guest is in advance */
align_clocks(&sc, cpu);
}
}
cc->cpu_exec_exit(cpu);
rcu_read_unlock();
/* fail safe : never use current_cpu outside cpu_exec() */
current_cpu = NULL;
/* Does not need atomic_mb_set because a spurious wakeup is okay. */
atomic_set(&tcg_current_cpu, NULL);
return ret;
}

387
cpus.c
View File

@@ -25,6 +25,7 @@
/* Needed early for CONFIG_BSD etc. */
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/config-file.h"
#include "cpu.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qerror.h"
@@ -45,6 +46,7 @@
#include "qemu/main-loop.h"
#include "qemu/bitmap.h"
#include "qemu/seqlock.h"
#include "tcg.h"
#include "qapi-event.h"
#include "hw/nmi.h"
#include "sysemu/replay.h"
@@ -150,6 +152,77 @@ typedef struct TimersState {
} TimersState;
static TimersState timers_state;
bool mttcg_enabled;
/*
* We default to false if we know other options have been enabled
* which are currently incompatible with MTTCG. Otherwise when each
* guest (target) has been updated to support:
* - atomic instructions
* - memory ordering primitives (barriers)
* they can set the appropriate CONFIG flags in ${target}-softmmu.mak
*
* Once a guest architecture has been converted to the new primitives
* there are two remaining limitations to check.
*
* - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
* - The host must have a stronger memory order than the guest
*
* It may be possible in future to support strong guests on weak hosts
* but that will require tagging all load/stores in a guest with their
* implicit memory order requirements which would likely slow things
* down a lot.
*/
static bool check_tcg_memory_orders_compatible(void)
{
#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
#else
return false;
#endif
}
static bool default_mttcg_enabled(void)
{
QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
const char *rr = qemu_opt_get(icount_opts, "rr");
if (rr || TCG_OVERSIZED_GUEST) {
return false;
} else {
#ifdef TARGET_SUPPORTS_MTTCG
return check_tcg_memory_orders_compatible();
#else
return false;
#endif
}
}
void qemu_tcg_configure(QemuOpts *opts, Error **errp)
{
const char *t = qemu_opt_get(opts, "thread");
if (t) {
if (strcmp(t, "multi") == 0) {
if (TCG_OVERSIZED_GUEST) {
error_setg(errp, "No MTTCG when guest word size > hosts");
} else {
if (!check_tcg_memory_orders_compatible()) {
error_report("Guest expects a stronger memory ordering "
"than the host provides");
error_printf("This may cause strange/hard to debug errors");
}
mttcg_enabled = true;
}
} else if (strcmp(t, "single") == 0) {
mttcg_enabled = false;
} else {
error_setg(errp, "Invalid 'thread' setting %s", t);
}
} else {
mttcg_enabled = default_mttcg_enabled();
}
}
int64_t cpu_get_icount_raw(void)
{
@@ -694,6 +767,63 @@ void configure_icount(QemuOpts *opts, Error **errp)
NANOSECONDS_PER_SECOND / 10);
}
/***********************************************************/
/* TCG vCPU kick timer
*
* The kick timer is responsible for moving single threaded vCPU
* emulation on to the next vCPU. If more than one vCPU is running a
* timer event with force a cpu->exit so the next vCPU can get
* scheduled.
*
* The timer is removed if all vCPUs are idle and restarted again once
* idleness is complete.
*/
static QEMUTimer *tcg_kick_vcpu_timer;
static CPUState *tcg_current_rr_cpu;
#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
static inline int64_t qemu_tcg_next_kick(void)
{
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
}
/* Kick the currently round-robin scheduled vCPU */
static void qemu_cpu_kick_rr_cpu(void)
{
CPUState *cpu;
do {
cpu = atomic_mb_read(&tcg_current_rr_cpu);
if (cpu) {
cpu_exit(cpu);
}
} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
}
static void kick_tcg_thread(void *opaque)
{
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
qemu_cpu_kick_rr_cpu();
}
static void start_tcg_kick_timer(void)
{
if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
kick_tcg_thread, NULL);
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
}
}
static void stop_tcg_kick_timer(void)
{
if (tcg_kick_vcpu_timer) {
timer_del(tcg_kick_vcpu_timer);
tcg_kick_vcpu_timer = NULL;
}
}
/***********************************************************/
void hw_error(const char *fmt, ...)
{
@@ -896,8 +1026,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu)
#endif /* _WIN32 */
static QemuMutex qemu_global_mutex;
static QemuCond qemu_io_proceeded_cond;
static unsigned iothread_requesting_mutex;
static QemuThread io_thread;
@@ -911,7 +1039,6 @@ void qemu_init_cpu_loop(void)
qemu_init_sigbus();
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_pause_cond);
qemu_cond_init(&qemu_io_proceeded_cond);
qemu_mutex_init(&qemu_global_mutex);
qemu_thread_get_self(&io_thread);
@@ -936,28 +1063,34 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
static void qemu_wait_io_event_common(CPUState *cpu)
{
atomic_mb_set(&cpu->thread_kicked, false);
if (cpu->stop) {
cpu->stop = false;
cpu->stopped = true;
qemu_cond_broadcast(&qemu_pause_cond);
}
process_queued_cpu_work(cpu);
cpu->thread_kicked = false;
}
static bool qemu_tcg_should_sleep(CPUState *cpu)
{
if (mttcg_enabled) {
return cpu_thread_is_idle(cpu);
} else {
return all_cpu_threads_idle();
}
}
static void qemu_tcg_wait_io_event(CPUState *cpu)
{
while (all_cpu_threads_idle()) {
while (qemu_tcg_should_sleep(cpu)) {
stop_tcg_kick_timer();
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
while (iothread_requesting_mutex) {
qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
}
start_tcg_kick_timer();
CPU_FOREACH(cpu) {
qemu_wait_io_event_common(cpu);
}
qemu_wait_io_event_common(cpu);
}
static void qemu_kvm_wait_io_event(CPUState *cpu)
@@ -1028,6 +1161,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->can_do_io = 1;
current_cpu = cpu;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
@@ -1036,9 +1170,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
current_cpu = cpu;
while (1) {
current_cpu = NULL;
qemu_mutex_unlock_iothread();
do {
int sig;
@@ -1049,7 +1181,6 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
exit(1);
}
qemu_mutex_lock_iothread();
current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
@@ -1115,9 +1246,11 @@ static int tcg_cpu_exec(CPUState *cpu)
cpu->icount_decr.u16.low = decr;
cpu->icount_extra = count;
}
qemu_mutex_unlock_iothread();
cpu_exec_start(cpu);
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
qemu_mutex_lock_iothread();
#ifdef CONFIG_PROFILER
tcg_time += profile_getclock() - ti;
#endif
@@ -1150,7 +1283,16 @@ static void deal_with_unplugged_cpus(void)
}
}
static void *qemu_tcg_cpu_thread_fn(void *arg)
/* Single-threaded TCG
*
* In the single-threaded case each vCPU is simulated in turn. If
* there is more than a single vCPU we create a simple timer to kick
* the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
* This is done explicitly rather than relying on side-effects
* elsewhere.
*/
static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
@@ -1172,15 +1314,18 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
/* process any pending work */
CPU_FOREACH(cpu) {
current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
}
/* process any pending work */
atomic_mb_set(&exit_request, 1);
start_tcg_kick_timer();
cpu = first_cpu;
/* process any pending work */
cpu->exit_request = 1;
while (1) {
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
qemu_account_warp_timer();
@@ -1189,7 +1334,10 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
cpu = first_cpu;
}
for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
atomic_mb_set(&tcg_current_rr_cpu, cpu);
current_cpu = cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
(cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
@@ -1200,22 +1348,32 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
break;
} else if (r == EXCP_ATOMIC) {
qemu_mutex_unlock_iothread();
cpu_exec_step_atomic(cpu);
qemu_mutex_lock_iothread();
break;
}
} else if (cpu->stop || cpu->stopped) {
} else if (cpu->stop) {
if (cpu->unplug) {
cpu = CPU_NEXT(cpu);
}
break;
}
} /* for cpu.. */
cpu = CPU_NEXT(cpu);
} /* while (cpu && !cpu->exit_request).. */
/* Pairs with smp_wmb in qemu_cpu_kick. */
atomic_mb_set(&exit_request, 0);
/* Does not need atomic_mb_set because a spurious wakeup is okay. */
atomic_set(&tcg_current_rr_cpu, NULL);
if (cpu && cpu->exit_request) {
atomic_mb_set(&cpu->exit_request, 0);
}
handle_icount_deadline();
qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
deal_with_unplugged_cpus();
}
@@ -1262,6 +1420,68 @@ static void CALLBACK dummy_apc_func(ULONG_PTR unused)
}
#endif
/* Multi-threaded TCG
*
* In the multi-threaded case each vCPU has its own thread. The TLS
* variable current_cpu can be used deep in the code to find the
* current CPUState for a given thread.
*/
static void *qemu_tcg_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
rcu_register_thread();
qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
cpu->can_do_io = 1;
current_cpu = cpu;
qemu_cond_signal(&qemu_cpu_cond);
/* process any pending work */
cpu->exit_request = 1;
while (1) {
if (cpu_can_run(cpu)) {
int r;
r = tcg_cpu_exec(cpu);
switch (r) {
case EXCP_DEBUG:
cpu_handle_guest_debug(cpu);
break;
case EXCP_HALTED:
/* during start-up the vCPU is reset and the thread is
* kicked several times. If we don't ensure we go back
* to sleep in the halted state we won't cleanly
* start-up when the vCPU is enabled.
*
* cpu->halted should ensure we sleep in wait_io_event
*/
g_assert(cpu->halted);
break;
case EXCP_ATOMIC:
qemu_mutex_unlock_iothread();
cpu_exec_step_atomic(cpu);
qemu_mutex_lock_iothread();
default:
/* Ignore everything else? */
break;
}
}
handle_icount_deadline();
atomic_mb_set(&cpu->exit_request, 0);
qemu_tcg_wait_io_event(cpu);
}
return NULL;
}
static void qemu_cpu_kick_thread(CPUState *cpu)
{
#ifndef _WIN32
@@ -1287,24 +1507,13 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
#endif
}
static void qemu_cpu_kick_no_halt(void)
{
CPUState *cpu;
/* Ensure whatever caused the exit has reached the CPU threads before
* writing exit_request.
*/
atomic_mb_set(&exit_request, 1);
cpu = atomic_mb_read(&tcg_current_cpu);
if (cpu) {
cpu_exit(cpu);
}
}
void qemu_cpu_kick(CPUState *cpu)
{
qemu_cond_broadcast(cpu->halt_cond);
if (tcg_enabled()) {
qemu_cpu_kick_no_halt();
cpu_exit(cpu);
/* NOP unless doing single-thread RR */
qemu_cpu_kick_rr_cpu();
} else {
if (hax_enabled()) {
/*
@@ -1342,27 +1551,14 @@ bool qemu_mutex_iothread_locked(void)
void qemu_mutex_lock_iothread(void)
{
atomic_inc(&iothread_requesting_mutex);
/* In the simple case there is no need to bump the VCPU thread out of
* TCG code execution.
*/
if (!tcg_enabled() || qemu_in_vcpu_thread() ||
!first_cpu || !first_cpu->created) {
qemu_mutex_lock(&qemu_global_mutex);
atomic_dec(&iothread_requesting_mutex);
} else {
if (qemu_mutex_trylock(&qemu_global_mutex)) {
qemu_cpu_kick_no_halt();
qemu_mutex_lock(&qemu_global_mutex);
}
atomic_dec(&iothread_requesting_mutex);
qemu_cond_broadcast(&qemu_io_proceeded_cond);
}
g_assert(!qemu_mutex_iothread_locked());
qemu_mutex_lock(&qemu_global_mutex);
iothread_locked = true;
}
void qemu_mutex_unlock_iothread(void)
{
g_assert(qemu_mutex_iothread_locked());
iothread_locked = false;
qemu_mutex_unlock(&qemu_global_mutex);
}
@@ -1392,13 +1588,6 @@ void pause_all_vcpus(void)
if (qemu_in_vcpu_thread()) {
cpu_stop_current();
if (!kvm_enabled()) {
CPU_FOREACH(cpu) {
cpu->stop = false;
cpu->stopped = true;
}
return;
}
}
while (!all_vcpus_paused()) {
@@ -1447,29 +1636,43 @@ void cpu_remove_sync(CPUState *cpu)
static void qemu_tcg_init_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
static QemuCond *tcg_halt_cond;
static QemuThread *tcg_cpu_thread;
static QemuCond *single_tcg_halt_cond;
static QemuThread *single_tcg_cpu_thread;
/* share a single thread for all cpus with TCG */
if (!tcg_cpu_thread) {
if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
tcg_halt_cond = cpu->halt_cond;
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
if (qemu_tcg_mttcg_enabled()) {
/* create a thread per vCPU with TCG (MTTCG) */
parallel_cpus = true;
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
cpu->cpu_index);
qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
} else {
/* share a single thread for all cpus with TCG */
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
qemu_thread_create(cpu->thread, thread_name,
qemu_tcg_rr_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
single_tcg_halt_cond = cpu->halt_cond;
single_tcg_cpu_thread = cpu->thread;
}
#ifdef _WIN32
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
tcg_cpu_thread = cpu->thread;
} else {
cpu->thread = tcg_cpu_thread;
cpu->halt_cond = tcg_halt_cond;
/* For non-MTTCG cases we share the thread */
cpu->thread = single_tcg_cpu_thread;
cpu->halt_cond = single_tcg_halt_cond;
}
}
@@ -1578,6 +1781,48 @@ int vm_stop(RunState state)
return do_vm_stop(state);
}
/**
* Prepare for (re)starting the VM.
* Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
* running or in case of an error condition), 0 otherwise.
*/
int vm_prepare_start(void)
{
RunState requested;
int res = 0;
qemu_vmstop_requested(&requested);
if (runstate_is_running() && requested == RUN_STATE__MAX) {
return -1;
}
/* Ensure that a STOP/RESUME pair of events is emitted if a
* vmstop request was pending. The BLOCK_IO_ERROR event, for
* example, according to documentation is always followed by
* the STOP event.
*/
if (runstate_is_running()) {
qapi_event_send_stop(&error_abort);
res = -1;
} else {
replay_enable_events();
cpu_enable_ticks();
runstate_set(RUN_STATE_RUNNING);
vm_state_notify(1, RUN_STATE_RUNNING);
}
/* We are sending this now, but the CPUs will be resumed shortly later */
qapi_event_send_resume(&error_abort);
return res;
}
void vm_start(void)
{
if (!vm_prepare_start()) {
resume_all_vcpus();
}
}
/* does a state transition even if the VM is already stopped,
current state is forgotten forever */
int vm_stop_force_state(RunState state)

471
cputlb.c
View File

@@ -18,6 +18,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/exec-all.h"
#include "exec/memory.h"
@@ -57,6 +58,40 @@
} \
} while (0)
#define assert_cpu_is_self(this_cpu) do { \
if (DEBUG_TLB_GATE) { \
g_assert(!cpu->created || qemu_cpu_is_self(cpu)); \
} \
} while (0)
/* run_on_cpu_data.target_ptr should always be big enough for a
* target_ulong even on 32 bit builds */
QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
/* We currently can't handle more than 16 bits in the MMUIDX bitmask.
*/
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
/* flush_all_helper: run fn across all cpus
*
* If the wait flag is set then the src cpu's helper will be queued as
* "safe" work and the loop exited creating a synchronisation point
* where all queued work will be finished before execution starts
* again.
*/
static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
run_on_cpu_data d)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (cpu != src) {
async_run_on_cpu(cpu, fn, d);
}
}
}
/* statistics */
int tlb_flush_count;
@@ -65,10 +100,22 @@ int tlb_flush_count;
* flushing more entries than required is only an efficiency issue,
* not a correctness issue.
*/
void tlb_flush(CPUState *cpu)
static void tlb_flush_nocheck(CPUState *cpu)
{
CPUArchState *env = cpu->env_ptr;
/* The QOM tests will trigger tlb_flushes without setting up TCG
* so we bug out here in that case.
*/
if (!tcg_enabled()) {
return;
}
assert_cpu_is_self(cpu);
tlb_debug("(count: %d)\n", tlb_flush_count++);
tb_lock();
memset(env->tlb_table, -1, sizeof(env->tlb_table));
memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
@@ -76,39 +123,117 @@ void tlb_flush(CPUState *cpu)
env->vtlb_index = 0;
env->tlb_flush_addr = -1;
env->tlb_flush_mask = 0;
tlb_flush_count++;
tb_unlock();
atomic_mb_set(&cpu->pending_tlb_flush, 0);
}
static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
{
tlb_flush_nocheck(cpu);
}
void tlb_flush(CPUState *cpu)
{
if (cpu->created && !qemu_cpu_is_self(cpu)) {
if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
async_run_on_cpu(cpu, tlb_flush_global_async_work,
RUN_ON_CPU_NULL);
}
} else {
tlb_flush_nocheck(cpu);
}
}
void tlb_flush_all_cpus(CPUState *src_cpu)
{
const run_on_cpu_func fn = tlb_flush_global_async_work;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
fn(src_cpu, RUN_ON_CPU_NULL);
}
void tlb_flush_all_cpus_synced(CPUState *src_cpu)
{
const run_on_cpu_func fn = tlb_flush_global_async_work;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
}
static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
unsigned long mmu_idx_bitmask = data.host_int;
int mmu_idx;
tlb_debug("start\n");
assert_cpu_is_self(cpu);
for (;;) {
int mmu_idx = va_arg(argp, int);
tb_lock();
if (mmu_idx < 0) {
break;
tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
tlb_debug("%d\n", mmu_idx);
memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
}
tlb_debug("%d\n", mmu_idx);
memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
}
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
tlb_debug("done\n");
tb_unlock();
}
void tlb_flush_by_mmuidx(CPUState *cpu, ...)
void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
{
va_list argp;
va_start(argp, cpu);
v_tlb_flush_by_mmuidx(cpu, argp);
va_end(argp);
tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
if (!qemu_cpu_is_self(cpu)) {
uint16_t pending_flushes = idxmap;
pending_flushes &= ~atomic_mb_read(&cpu->pending_tlb_flush);
if (pending_flushes) {
tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
atomic_or(&cpu->pending_tlb_flush, pending_flushes);
async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
RUN_ON_CPU_HOST_INT(pending_flushes));
}
} else {
tlb_flush_by_mmuidx_async_work(cpu,
RUN_ON_CPU_HOST_INT(idxmap));
}
}
void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
}
void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
}
static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
{
if (addr == (tlb_entry->addr_read &
@@ -121,12 +246,15 @@ static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
}
}
void tlb_flush_page(CPUState *cpu, target_ulong addr)
static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
target_ulong addr = (target_ulong) data.target_ptr;
int i;
int mmu_idx;
assert_cpu_is_self(cpu);
tlb_debug("page :" TARGET_FMT_lx "\n", addr);
/* Check if we need to flush due to large pages. */
@@ -156,15 +284,62 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
tb_flush_jmp_cache(cpu, addr);
}
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
void tlb_flush_page(CPUState *cpu, target_ulong addr)
{
tlb_debug("page :" TARGET_FMT_lx "\n", addr);
if (!qemu_cpu_is_self(cpu)) {
async_run_on_cpu(cpu, tlb_flush_page_async_work,
RUN_ON_CPU_TARGET_PTR(addr));
} else {
tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
}
}
/* As we are going to hijack the bottom bits of the page address for a
* mmuidx bit mask we need to fail to build if we can't do that
*/
QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
int i, k;
va_list argp;
target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
int mmu_idx;
int i;
va_start(argp, addr);
assert_cpu_is_self(cpu);
tlb_debug("addr "TARGET_FMT_lx"\n", addr);
tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
page, addr, mmu_idx_bitmap);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr);
/* check whether there are vltb entries that need to be flushed */
for (i = 0; i < CPU_VTLB_SIZE; i++) {
tlb_flush_entry(&env->tlb_v_table[mmu_idx][i], addr);
}
}
}
tb_flush_jmp_cache(cpu, addr);
}
static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
/* Check if we need to flush due to large pages. */
if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
@@ -172,33 +347,80 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
env->tlb_flush_addr, env->tlb_flush_mask);
v_tlb_flush_by_mmuidx(cpu, argp);
va_end(argp);
return;
tlb_flush_by_mmuidx_async_work(cpu,
RUN_ON_CPU_HOST_INT(mmu_idx_bitmap));
} else {
tlb_flush_page_by_mmuidx_async_work(cpu, data);
}
}
addr &= TARGET_PAGE_MASK;
i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
{
target_ulong addr_and_mmu_idx;
for (;;) {
int mmu_idx = va_arg(argp, int);
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
if (mmu_idx < 0) {
break;
}
/* This should already be page aligned */
addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
addr_and_mmu_idx |= idxmap;
tlb_debug("idx %d\n", mmu_idx);
tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
/* check whether there are vltb entries that need to be flushed */
for (k = 0; k < CPU_VTLB_SIZE; k++) {
tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
}
if (!qemu_cpu_is_self(cpu)) {
async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
} else {
tlb_check_page_and_flush_by_mmuidx_async_work(
cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
va_end(argp);
}
tb_flush_jmp_cache(cpu, addr);
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
target_ulong addr_and_mmu_idx;
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
/* This should already be page aligned */
addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
addr_and_mmu_idx |= idxmap;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
target_ulong addr,
uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
target_ulong addr_and_mmu_idx;
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
/* This should already be page aligned */
addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
addr_and_mmu_idx |= idxmap;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
{
const run_on_cpu_func fn = tlb_flush_page_async_work;
flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
fn(src, RUN_ON_CPU_TARGET_PTR(addr));
}
void tlb_flush_page_all_cpus_synced(CPUState *src,
target_ulong addr)
{
const run_on_cpu_func fn = tlb_flush_page_async_work;
flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
}
/* update the TLBs so that writes to code in the virtual page 'addr'
@@ -216,36 +438,84 @@ void tlb_unprotect_code(ram_addr_t ram_addr)
cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
}
static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
{
return (tlbe->addr_write & (TLB_INVALID_MASK|TLB_MMIO|TLB_NOTDIRTY)) == 0;
}
void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
/*
* Dirty write flag handling
*
* When the TCG code writes to a location it looks up the address in
* the TLB and uses that data to compute the final address. If any of
* the lower bits of the address are set then the slow path is forced.
* There are a number of reasons to do this but for normal RAM the
* most usual is detecting writes to code regions which may invalidate
* generated code.
*
* Because we want other vCPUs to respond to changes straight away we
* update the te->addr_write field atomically. If the TLB entry has
* been changed by the vCPU in the mean time we skip the update.
*
* As this function uses atomic accesses we also need to ensure
* updates to tlb_entries follow the same access rules. We don't need
* to worry about this for oversized guests as MTTCG is disabled for
* them.
*/
static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
uintptr_t length)
{
uintptr_t addr;
#if TCG_OVERSIZED_GUEST
uintptr_t addr = tlb_entry->addr_write;
if (tlb_is_dirty_ram(tlb_entry)) {
addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
addr &= TARGET_PAGE_MASK;
addr += tlb_entry->addend;
if ((addr - start) < length) {
tlb_entry->addr_write |= TLB_NOTDIRTY;
}
}
}
#else
/* paired with atomic_mb_set in tlb_set_page_with_attrs */
uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write);
uintptr_t addr = orig_addr;
static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
{
ram_addr_t ram_addr;
ram_addr = qemu_ram_addr_from_host(ptr);
if (ram_addr == RAM_ADDR_INVALID) {
fprintf(stderr, "Bad ram pointer %p\n", ptr);
abort();
if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
addr &= TARGET_PAGE_MASK;
addr += atomic_read(&tlb_entry->addend);
if ((addr - start) < length) {
uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY;
atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr);
}
}
return ram_addr;
#endif
}
/* For atomic correctness when running MTTCG we need to use the right
* primitives when copying entries */
static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s,
bool atomic_set)
{
#if TCG_OVERSIZED_GUEST
*d = *s;
#else
if (atomic_set) {
d->addr_read = s->addr_read;
d->addr_code = s->addr_code;
atomic_set(&d->addend, atomic_read(&s->addend));
/* Pairs with flag setting in tlb_reset_dirty_range */
atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write));
} else {
d->addr_read = s->addr_read;
d->addr_write = atomic_read(&s->addr_write);
d->addr_code = s->addr_code;
d->addend = atomic_read(&s->addend);
}
#endif
}
/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
* the target vCPU). As such care needs to be taken that we don't
* dangerously race with another vCPU update. The only thing actually
* updated is the target TLB entry ->addr_write flags.
*/
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
{
CPUArchState *env;
@@ -283,6 +553,8 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
int i;
int mmu_idx;
assert_cpu_is_self(cpu);
vaddr &= TARGET_PAGE_MASK;
i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
@@ -337,11 +609,12 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
target_ulong address;
target_ulong code_address;
uintptr_t addend;
CPUTLBEntry *te;
CPUTLBEntry *te, *tv, tn;
hwaddr iotlb, xlat, sz;
unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
int asidx = cpu_asidx_from_attrs(cpu, attrs);
assert_cpu_is_self(cpu);
assert(size >= TARGET_PAGE_SIZE);
if (size != TARGET_PAGE_SIZE) {
tlb_add_large_page(env, vaddr, size);
@@ -371,41 +644,50 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
te = &env->tlb_table[mmu_idx][index];
/* do not discard the translation in te, evict it into a victim tlb */
env->tlb_v_table[mmu_idx][vidx] = *te;
tv = &env->tlb_v_table[mmu_idx][vidx];
/* addr_write can race with tlb_reset_dirty_range */
copy_tlb_helper(tv, te, true);
env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
/* refill the tlb */
env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
env->iotlb[mmu_idx][index].attrs = attrs;
te->addend = addend - vaddr;
/* Now calculate the new entry */
tn.addend = addend - vaddr;
if (prot & PAGE_READ) {
te->addr_read = address;
tn.addr_read = address;
} else {
te->addr_read = -1;
tn.addr_read = -1;
}
if (prot & PAGE_EXEC) {
te->addr_code = code_address;
tn.addr_code = code_address;
} else {
te->addr_code = -1;
tn.addr_code = -1;
}
tn.addr_write = -1;
if (prot & PAGE_WRITE) {
if ((memory_region_is_ram(section->mr) && section->readonly)
|| memory_region_is_romd(section->mr)) {
/* Write access calls the I/O callback. */
te->addr_write = address | TLB_MMIO;
tn.addr_write = address | TLB_MMIO;
} else if (memory_region_is_ram(section->mr)
&& cpu_physical_memory_is_clean(
memory_region_get_ram_addr(section->mr) + xlat)) {
te->addr_write = address | TLB_NOTDIRTY;
tn.addr_write = address | TLB_NOTDIRTY;
} else {
te->addr_write = address;
tn.addr_write = address;
}
} else {
te->addr_write = -1;
}
/* Pairs with flag setting in tlb_reset_dirty_range */
copy_tlb_helper(te, &tn, true);
/* atomic_mb_set(&te->addr_write, write_address); */
}
/* Add a new TLB entry, but without specifying the memory
@@ -452,6 +734,18 @@ static void report_bad_exec(CPUState *cpu, target_ulong addr)
log_cpu_state_mask(LOG_GUEST_ERROR, cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
}
static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
{
ram_addr_t ram_addr;
ram_addr = qemu_ram_addr_from_host(ptr);
if (ram_addr == RAM_ADDR_INVALID) {
error_report("Bad ram pointer %p", ptr);
abort();
}
return ram_addr;
}
/* NOTE: this function can trigger an exception */
/* NOTE2: the returned address is not exactly the physical address: it
* is actually a ram_addr_t (in system mode; the user mode emulation
@@ -495,6 +789,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
uint64_t val;
bool locked = false;
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
cpu->mem_io_pc = retaddr;
@@ -503,7 +798,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
}
cpu->mem_io_vaddr = addr;
if (mr->global_locking) {
qemu_mutex_lock_iothread();
locked = true;
}
memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs);
if (locked) {
qemu_mutex_unlock_iothread();
}
return val;
}
@@ -514,15 +818,23 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
CPUState *cpu = ENV_GET_CPU(env);
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
bool locked = false;
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
cpu->mem_io_vaddr = addr;
cpu->mem_io_pc = retaddr;
if (mr->global_locking) {
qemu_mutex_lock_iothread();
locked = true;
}
memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
if (locked) {
qemu_mutex_unlock_iothread();
}
}
/* Return true if ADDR is present in the victim tlb, and has been copied
@@ -538,10 +850,13 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
if (cmp == page) {
/* Found entry in victim tlb, swap tlb and iotlb. */
CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
copy_tlb_helper(&tmptlb, tlb, false);
copy_tlb_helper(tlb, vtlb, true);
copy_tlb_helper(vtlb, &tmptlb, true);
CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
tmptlb = *tlb; *tlb = *vtlb; *vtlb = tmptlb;
tmpio = *io; *io = *vio; *vio = tmpio;
return true;
}

View File

@@ -95,6 +95,8 @@ CONFIG_VERSATILE_PCI=y
CONFIG_VERSATILE_I2C=y
CONFIG_PCI_GENERIC=y
CONFIG_VFIO_XGMAC=y
CONFIG_VFIO_AMD_XGBE=y
CONFIG_SDHCI=y
CONFIG_INTEGRATOR_DEBUG=y

View File

@@ -10,3 +10,6 @@ CONFIG_JAZZ=y
CONFIG_G364FB=y
CONFIG_JAZZ_LED=y
CONFIG_VT82C686=y
CONFIG_MIPS_BOSTON=y
CONFIG_FITLOADER=y
CONFIG_PCI_XILINX=y

View File

@@ -0,0 +1 @@
# Default configuration for or1k-linux-user

View File

@@ -0,0 +1,4 @@
# Default configuration for or1k-softmmu
CONFIG_SERIAL=y
CONFIG_OPENCORES_ETH=y

View File

@@ -1 +0,0 @@
# Default configuration for or32-linux-user

View File

@@ -1,4 +0,0 @@
# Default configuration for or32-softmmu
CONFIG_SERIAL=y
CONFIG_OPENCORES_ETH=y

View File

@@ -190,6 +190,7 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
s.cpu = cpu;
s.info.read_memory_func = target_read_memory;
s.info.read_memory_inner_func = NULL;
s.info.buffer_vma = code;
s.info.buffer_length = size;
s.info.print_address_func = generic_print_address;

View File

@@ -166,8 +166,10 @@ static void dma_blk_cb(void *opaque, int ret)
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
}
aio_context_acquire(dbs->ctx);
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
dma_blk_cb, dbs, dbs->io_func_opaque);
aio_context_release(dbs->ctx);
assert(dbs->acb);
}

350
docs/multi-thread-tcg.txt Normal file
View File

@@ -0,0 +1,350 @@
Copyright (c) 2015-2016 Linaro Ltd.
This work is licensed under the terms of the GNU GPL, version 2 or
later. See the COPYING file in the top-level directory.
Introduction
============
This document outlines the design for multi-threaded TCG system-mode
emulation. The current user-mode emulation mirrors the thread
structure of the translated executable. Some of the work will be
applicable to both system and linux-user emulation.
The original system-mode TCG implementation was single threaded and
dealt with multiple CPUs with simple round-robin scheduling. This
simplified a lot of things but became increasingly limited as systems
being emulated gained additional cores and per-core performance gains
for host systems started to level off.
vCPU Scheduling
===============
We introduce a new running mode where each vCPU will run on its own
user-space thread. This will be enabled by default for all FE/BE
combinations that have had the required work done to support this
safely.
In the general case of running translated code there should be no
inter-vCPU dependencies and all vCPUs should be able to run at full
speed. Synchronisation will only be required while accessing internal
shared data structures or when the emulated architecture requires a
coherent representation of the emulated machine state.
Shared Data Structures
======================
Main Run Loop
-------------
Even when there is no code being generated there are a number of
structures associated with the hot-path through the main run-loop.
These are associated with looking up the next translation block to
execute. These include:
tb_jmp_cache (per-vCPU, cache of recent jumps)
tb_ctx.htable (global hash table, phys address->tb lookup)
As TB linking only occurs when blocks are in the same page this code
is critical to performance as looking up the next TB to execute is the
most common reason to exit the generated code.
DESIGN REQUIREMENT: Make access to lookup structures safe with
multiple reader/writer threads. Minimise any lock contention to do it.
The hot-path avoids using locks where possible. The tb_jmp_cache is
updated with atomic accesses to ensure consistent results. The fall
back QHT based hash table is also designed for lockless lookups. Locks
are only taken when code generation is required or TranslationBlocks
have their block-to-block jumps patched.
Global TCG State
----------------
We need to protect the entire code generation cycle including any post
generation patching of the translated code. This also implies a shared
translation buffer which contains code running on all cores. Any
execution path that comes to the main run loop will need to hold a
mutex for code generation. This also includes times when we need flush
code or entries from any shared lookups/caches. Structures held on a
per-vCPU basis won't need locking unless other vCPUs will need to
modify them.
DESIGN REQUIREMENT: Add locking around all code generation and TB
patching.
(Current solution)
Mainly as part of the linux-user work all code generation is
serialised with a tb_lock(). For the SoftMMU tb_lock() also takes the
place of mmap_lock() in linux-user.
Translation Blocks
------------------
Currently the whole system shares a single code generation buffer
which when full will force a flush of all translations and start from
scratch again. Some operations also force a full flush of translations
including:
- debugging operations (breakpoint insertion/removal)
- some CPU helper functions
This is done with the async_safe_run_on_cpu() mechanism to ensure all
vCPUs are quiescent when changes are being made to shared global
structures.
More granular translation invalidation events are typically due
to a change of the state of a physical page:
- code modification (self modify code, patching code)
- page changes (new page mapping in linux-user mode)
While setting the invalid flag in a TranslationBlock will stop it
being used when looked up in the hot-path there are a number of other
book-keeping structures that need to be safely cleared.
Any TranslationBlocks which have been patched to jump directly to the
now invalid blocks need the jump patches reversing so they will return
to the C code.
There are a number of look-up caches that need to be properly updated
including the:
- jump lookup cache
- the physical-to-tb lookup hash table
- the global page table
The global page table (l1_map) which provides a multi-level look-up
for PageDesc structures which contain pointers to the start of a
linked list of all Translation Blocks in that page (see page_next).
Both the jump patching and the page cache involve linked lists that
the invalidated TranslationBlock needs to be removed from.
DESIGN REQUIREMENT: Safely handle invalidation of TBs
- safely patch/revert direct jumps
- remove central PageDesc lookup entries
- ensure lookup caches/hashes are safely updated
(Current solution)
The direct jump themselves are updated atomically by the TCG
tb_set_jmp_target() code. Modification to the linked lists that allow
searching for linked pages are done under the protect of the
tb_lock().
The global page table is protected by the tb_lock() in system-mode and
mmap_lock() in linux-user mode.
The lookup caches are updated atomically and the lookup hash uses QHT
which is designed for concurrent safe lookup.
Memory maps and TLBs
--------------------
The memory handling code is fairly critical to the speed of memory
access in the emulated system. The SoftMMU code is designed so the
hot-path can be handled entirely within translated code. This is
handled with a per-vCPU TLB structure which once populated will allow
a series of accesses to the page to occur without exiting the
translated code. It is possible to set flags in the TLB address which
will ensure the slow-path is taken for each access. This can be done
to support:
- Memory regions (dividing up access to PIO, MMIO and RAM)
- Dirty page tracking (for code gen, SMC detection, migration and display)
- Virtual TLB (for translating guest address->real address)
When the TLB tables are updated by a vCPU thread other than their own
we need to ensure it is done in a safe way so no inconsistent state is
seen by the vCPU thread.
Some operations require updating a number of vCPUs TLBs at the same
time in a synchronised manner.
DESIGN REQUIREMENTS:
- TLB Flush All/Page
- can be across-vCPUs
- cross vCPU TLB flush may need other vCPU brought to halt
- change may need to be visible to the calling vCPU immediately
- TLB Flag Update
- usually cross-vCPU
- want change to be visible as soon as possible
- TLB Update (update a CPUTLBEntry, via tlb_set_page_with_attrs)
- This is a per-vCPU table - by definition can't race
- updated by its own thread when the slow-path is forced
(Current solution)
We have updated cputlb.c to defer operations when a cross-vCPU
operation with async_run_on_cpu() which ensures each vCPU sees a
coherent state when it next runs its work (in a few instructions
time).
A new set up operations (tlb_flush_*_all_cpus) take an additional flag
which when set will force synchronisation by setting the source vCPUs
work as "safe work" and exiting the cpu run loop. This ensure by the
time execution restarts all flush operations have completed.
TLB flag updates are all done atomically and are also protected by the
tb_lock() which is used by the functions that update the TLB in bulk.
(Known limitation)
Not really a limitation but the wait mechanism is overly strict for
some architectures which only need flushes completed by a barrier
instruction. This could be a future optimisation.
Emulated hardware state
-----------------------
Currently thanks to KVM work any access to IO memory is automatically
protected by the global iothread mutex, also known as the BQL (Big
Qemu Lock). Any IO region that doesn't use global mutex is expected to
do its own locking.
However IO memory isn't the only way emulated hardware state can be
modified. Some architectures have model specific registers that
trigger hardware emulation features. Generally any translation helper
that needs to update more than a single vCPUs of state should take the
BQL.
As the BQL, or global iothread mutex is shared across the system we
push the use of the lock as far down into the TCG code as possible to
minimise contention.
(Current solution)
MMIO access automatically serialises hardware emulation by way of the
BQL. Currently ARM targets serialise all ARM_CP_IO register accesses
and also defer the reset/startup of vCPUs to the vCPU context by way
of async_run_on_cpu().
Updates to interrupt state are also protected by the BQL as they can
often be cross vCPU.
Memory Consistency
==================
Between emulated guests and host systems there are a range of memory
consistency models. Even emulating weakly ordered systems on strongly
ordered hosts needs to ensure things like store-after-load re-ordering
can be prevented when the guest wants to.
Memory Barriers
---------------
Barriers (sometimes known as fences) provide a mechanism for software
to enforce a particular ordering of memory operations from the point
of view of external observers (e.g. another processor core). They can
apply to any memory operations as well as just loads or stores.
The Linux kernel has an excellent write-up on the various forms of
memory barrier and the guarantees they can provide [1].
Barriers are often wrapped around synchronisation primitives to
provide explicit memory ordering semantics. However they can be used
by themselves to provide safe lockless access by ensuring for example
a change to a signal flag will only be visible once the changes to
payload are.
DESIGN REQUIREMENT: Add a new tcg_memory_barrier op
This would enforce a strong load/store ordering so all loads/stores
complete at the memory barrier. On single-core non-SMP strongly
ordered backends this could become a NOP.
Aside from explicit standalone memory barrier instructions there are
also implicit memory ordering semantics which comes with each guest
memory access instruction. For example all x86 load/stores come with
fairly strong guarantees of sequential consistency where as ARM has
special variants of load/store instructions that imply acquire/release
semantics.
In the case of a strongly ordered guest architecture being emulated on
a weakly ordered host the scope for a heavy performance impact is
quite high.
DESIGN REQUIREMENTS: Be efficient with use of memory barriers
- host systems with stronger implied guarantees can skip some barriers
- merge consecutive barriers to the strongest one
(Current solution)
The system currently has a tcg_gen_mb() which will add memory barrier
operations if code generation is being done in a parallel context. The
tcg_optimize() function attempts to merge barriers up to their
strongest form before any load/store operations. The solution was
originally developed and tested for linux-user based systems. All
backends have been converted to emit fences when required. So far the
following front-ends have been updated to emit fences when required:
- target-i386
- target-arm
- target-aarch64
- target-alpha
- target-mips
Memory Control and Maintenance
------------------------------
This includes a class of instructions for controlling system cache
behaviour. While QEMU doesn't model cache behaviour these instructions
are often seen when code modification has taken place to ensure the
changes take effect.
Synchronisation Primitives
--------------------------
There are two broad types of synchronisation primitives found in
modern ISAs: atomic instructions and exclusive regions.
The first type offer a simple atomic instruction which will guarantee
some sort of test and conditional store will be truly atomic w.r.t.
other cores sharing access to the memory. The classic example is the
x86 cmpxchg instruction.
The second type offer a pair of load/store instructions which offer a
guarantee that an region of memory has not been touched between the
load and store instructions. An example of this is ARM's ldrex/strex
pair where the strex instruction will return a flag indicating a
successful store only if no other CPU has accessed the memory region
since the ldrex.
Traditionally TCG has generated a series of operations that work
because they are within the context of a single translation block so
will have completed before another CPU is scheduled. However with
the ability to have multiple threads running to emulate multiple CPUs
we will need to explicitly expose these semantics.
DESIGN REQUIREMENTS:
- Support classic atomic instructions
- Support load/store exclusive (or load link/store conditional) pairs
- Generic enough infrastructure to support all guest architectures
CURRENT OPEN QUESTIONS:
- How problematic is the ABA problem in general?
(Current solution)
The TCG provides a number of atomic helpers (tcg_gen_atomic_*) which
can be used directly or combined to emulate other instructions like
ARM's ldrex/strex instructions. While they are susceptible to the ABA
problem so far common guests have not implemented patterns where
this may be a problem - typically presenting a locking ABI which
assumes cmpxchg like semantics.
The code also includes a fall-back for cases where multi-threaded TCG
ops can't work (e.g. guest atomic width > host atomic width). In this
case an EXCP_ATOMIC exit occurs and the instruction is emulated with
an exclusive lock which ensures all emulation is serialised.
While the atomic helpers look good enough for now there may be a need
to look at solutions that can more closely model the guest
architectures semantics.
==========
[1] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/Documentation/memory-barriers.txt

124
docs/nvdimm.txt Normal file
View File

@@ -0,0 +1,124 @@
QEMU Virtual NVDIMM
===================
This document explains the usage of virtual NVDIMM (vNVDIMM) feature
which is available since QEMU v2.6.0.
The current QEMU only implements the persistent memory mode of vNVDIMM
device and not the block window mode.
Basic Usage
-----------
The storage of a vNVDIMM device in QEMU is provided by the memory
backend (i.e. memory-backend-file and memory-backend-ram). A simple
way to create a vNVDIMM device at startup time is done via the
following command line options:
-machine pc,nvdimm
-m $RAM_SIZE,slots=$N,maxmem=$MAX_SIZE
-object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE
-device nvdimm,id=nvdimm1,memdev=mem1
Where,
- the "nvdimm" machine option enables vNVDIMM feature.
- "slots=$N" should be equal to or larger than the total amount of
normal RAM devices and vNVDIMM devices, e.g. $N should be >= 2 here.
- "maxmem=$MAX_SIZE" should be equal to or larger than the total size
of normal RAM devices and vNVDIMM devices, e.g. $MAX_SIZE should be
>= $RAM_SIZE + $NVDIMM_SIZE here.
- "object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE"
creates a backend storage of size $NVDIMM_SIZE on a file $PATH. All
accesses to the virtual NVDIMM device go to the file $PATH.
"share=on/off" controls the visibility of guest writes. If
"share=on", then guest writes will be applied to the backend
file. If another guest uses the same backend file with option
"share=on", then above writes will be visible to it as well. If
"share=off", then guest writes won't be applied to the backend
file and thus will be invisible to other guests.
- "device nvdimm,id=nvdimm1,memdev=mem1" creates a virtual NVDIMM
device whose storage is provided by above memory backend device.
Multiple vNVDIMM devices can be created if multiple pairs of "-object"
and "-device" are provided.
For above command line options, if the guest OS has the proper NVDIMM
driver, it should be able to detect a NVDIMM device which is in the
persistent memory mode and whose size is $NVDIMM_SIZE.
Note:
1. Prior to QEMU v2.8.0, if memory-backend-file is used and the actual
backend file size is not equal to the size given by "size" option,
QEMU will truncate the backend file by ftruncate(2), which will
corrupt the existing data in the backend file, especially for the
shrink case.
QEMU v2.8.0 and later check the backend file size and the "size"
option. If they do not match, QEMU will report errors and abort in
order to avoid the data corruption.
2. QEMU v2.6.0 only puts a basic alignment requirement on the "size"
option of memory-backend-file, e.g. 4KB alignment on x86. However,
QEMU v.2.7.0 puts an additional alignment requirement, which may
require a larger value than the basic one, e.g. 2MB on x86. This
change breaks the usage of memory-backend-file that only satisfies
the basic alignment.
QEMU v2.8.0 and later remove the additional alignment on non-s390x
architectures, so the broken memory-backend-file can work again.
Label
-----
QEMU v2.7.0 and later implement the label support for vNVDIMM devices.
To enable label on vNVDIMM devices, users can simply add
"label-size=$SZ" option to "-device nvdimm", e.g.
-device nvdimm,id=nvdimm1,memdev=mem1,label-size=128K
Note:
1. The minimal label size is 128KB.
2. QEMU v2.7.0 and later store labels at the end of backend storage.
If a memory backend file, which was previously used as the backend
of a vNVDIMM device without labels, is now used for a vNVDIMM
device with label, the data in the label area at the end of file
will be inaccessible to the guest. If any useful data (e.g. the
meta-data of the file system) was stored there, the latter usage
may result guest data corruption (e.g. breakage of guest file
system).
Hotplug
-------
QEMU v2.8.0 and later implement the hotplug support for vNVDIMM
devices. Similarly to the RAM hotplug, the vNVDIMM hotplug is
accomplished by two monitor commands "object_add" and "device_add".
For example, the following commands add another 4GB vNVDIMM device to
the guest:
(qemu) object_add memory-backend-file,id=mem2,share=on,mem-path=new_nvdimm.img,size=4G
(qemu) device_add nvdimm,id=nvdimm2,memdev=mem2
Note:
1. Each hotplugged vNVDIMM device consumes one memory slot. Users
should always ensure the memory option "-m ...,slots=N" specifies
enough number of slots, i.e.
N >= number of RAM devices +
number of statically plugged vNVDIMM devices +
number of hotplugged vNVDIMM devices
2. The similar is required for the memory option "-m ...,maxmem=M", i.e.
M >= size of RAM devices +
size of statically plugged vNVDIMM devices +
size of hotplugged vNVDIMM devices

View File

@@ -200,7 +200,7 @@ LEGACY-CHARDEV translates to -chardev HOST-OPTS... as follows:
* null becomes -chardev null
* pty, msmouse, braille, stdio likewise
* pty, msmouse, wctablet, braille, stdio likewise
* vc:WIDTHxHEIGHT becomes -chardev vc,width=WIDTH,height=HEIGHT

View File

@@ -1,6 +1,8 @@
\input texinfo
@setfilename qemu-ga-ref.info
@include version.texi
@exampleindent 0
@paragraphindent 0

View File

@@ -1,6 +1,8 @@
\input texinfo
@setfilename qemu-qmp-ref.info
@include version.texi
@exampleindent 0
@paragraphindent 0

View File

@@ -61,6 +61,7 @@ PCI devices (other than virtio):
1b36:0009 PCI Expander Bridge (-device pxb)
1b36:000a PCI-PCI bridge (multiseat)
1b36:000b PCIe Expander Bridge (-device pxb-pcie)
1b36:000d PCI xhci usb host adapter
All these devices are documented in docs/specs.

2
dtc

Submodule dtc updated: 65cc4d2748...ec02b34c05

14
exec.c
View File

@@ -2115,6 +2115,7 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
return;
}
vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
vaddr = cc->adjust_watchpoint_address(cpu, vaddr, len);
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
if (cpu_watchpoint_address_matches(wp, vaddr, len)
&& (wp->flags & flags)) {
@@ -2133,9 +2134,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
}
cpu->watchpoint_hit = wp;
/* The tb_lock will be reset when cpu_loop_exit or
* cpu_loop_exit_noexc longjmp back into the cpu_exec
* main loop.
/* Both tb_lock and iothread_mutex will be reset when
* cpu_loop_exit or cpu_loop_exit_noexc longjmp
* back into the cpu_exec main loop.
*/
tb_lock();
tb_check_watchpoint(cpu);
@@ -2370,8 +2371,14 @@ static void io_mem_init(void)
memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
NULL, UINT64_MAX);
/* io_mem_notdirty calls tb_invalidate_phys_page_fast,
* which can be called without the iothread mutex.
*/
memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
NULL, UINT64_MAX);
memory_region_clear_global_locking(&io_mem_notdirty);
memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
NULL, UINT64_MAX);
}
@@ -3165,6 +3172,7 @@ void address_space_cache_destroy(MemoryRegionCache *cache)
xen_invalidate_map_cache_entry(cache->ptr);
}
memory_region_unref(cache->mr);
cache->mr = NULL;
}
/* Called from RCU critical section. This function has the same

View File

@@ -623,6 +623,9 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
case float_round_down:
roundIncrement = zSign ? 0x3ff : 0;
break;
case float_round_to_odd:
roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
break;
default:
abort();
}
@@ -632,8 +635,10 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
|| ( ( zExp == 0x7FD )
&& ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
) {
bool overflow_to_inf = roundingMode != float_round_to_odd &&
roundIncrement != 0;
float_raise(float_flag_overflow | float_flag_inexact, status);
return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
}
if ( zExp < 0 ) {
if (status->flush_to_zero) {
@@ -651,6 +656,13 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
if (isTiny && roundBits) {
float_raise(float_flag_underflow, status);
}
if (roundingMode == float_round_to_odd) {
/*
* For round-to-odd case, the roundIncrement depends on
* zSig which just changed.
*/
roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
}
}
}
if (roundBits) {
@@ -1149,6 +1161,9 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
case float_round_down:
increment = zSign && zSig2;
break;
case float_round_to_odd:
increment = !(zSig1 & 0x1) && zSig2;
break;
default:
abort();
}
@@ -1168,6 +1183,7 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
if ( ( roundingMode == float_round_to_zero )
|| ( zSign && ( roundingMode == float_round_up ) )
|| ( ! zSign && ( roundingMode == float_round_down ) )
|| (roundingMode == float_round_to_odd)
) {
return
packFloat128(
@@ -1215,6 +1231,9 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
case float_round_down:
increment = zSign && zSig2;
break;
case float_round_to_odd:
increment = !(zSig1 & 0x1) && zSig2;
break;
default:
abort();
}
@@ -6108,6 +6127,93 @@ int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
}
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point value
| `a' to the 64-bit unsigned integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic---which means in particular that the conversion is rounded
| according to the current rounding mode. If `a' is a NaN, the largest
| positive integer is returned. If the conversion overflows, the
| largest unsigned integer is returned. If 'a' is negative, the value is
| rounded and zero is returned; negative values that do not round to zero
| will raise the inexact exception.
*----------------------------------------------------------------------------*/
uint64_t float128_to_uint64(float128 a, float_status *status)
{
flag aSign;
int aExp;
int shiftCount;
uint64_t aSig0, aSig1;
aSig0 = extractFloat128Frac0(a);
aSig1 = extractFloat128Frac1(a);
aExp = extractFloat128Exp(a);
aSign = extractFloat128Sign(a);
if (aSign && (aExp > 0x3FFE)) {
float_raise(float_flag_invalid, status);
if (float128_is_any_nan(a)) {
return LIT64(0xFFFFFFFFFFFFFFFF);
} else {
return 0;
}
}
if (aExp) {
aSig0 |= LIT64(0x0001000000000000);
}
shiftCount = 0x402F - aExp;
if (shiftCount <= 0) {
if (0x403E < aExp) {
float_raise(float_flag_invalid, status);
return LIT64(0xFFFFFFFFFFFFFFFF);
}
shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
} else {
shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
}
return roundAndPackUint64(aSign, aSig0, aSig1, status);
}
uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
{
uint64_t v;
signed char current_rounding_mode = status->float_rounding_mode;
set_float_rounding_mode(float_round_to_zero, status);
v = float128_to_uint64(a, status);
set_float_rounding_mode(current_rounding_mode, status);
return v;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point
| value `a' to the 32-bit unsigned integer format. The conversion
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic except that the conversion is always rounded toward zero.
| If `a' is a NaN, the largest positive integer is returned. Otherwise,
| if the conversion overflows, the largest unsigned integer is returned.
| If 'a' is negative, the value is rounded and zero is returned; negative
| values that do not round to zero will raise the inexact exception.
*----------------------------------------------------------------------------*/
uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
{
uint64_t v;
uint32_t res;
int old_exc_flags = get_float_exception_flags(status);
v = float128_to_uint64_round_to_zero(a, status);
if (v > 0xffffffff) {
res = 0xffffffff;
} else {
return v;
}
set_float_exception_flags(old_exc_flags, status);
float_raise(float_flag_invalid, status);
return res;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point
| value `a' to the single-precision floating-point format. The conversion

207
gdbstub.c
View File

@@ -387,6 +387,60 @@ static inline void gdb_continue(GDBState *s)
#endif
}
/*
* Resume execution, per CPU actions. For user-mode emulation it's
* equivalent to gdb_continue.
*/
static int gdb_continue_partial(GDBState *s, char *newstates)
{
CPUState *cpu;
int res = 0;
#ifdef CONFIG_USER_ONLY
/*
* This is not exactly accurate, but it's an improvement compared to the
* previous situation, where only one CPU would be single-stepped.
*/
CPU_FOREACH(cpu) {
if (newstates[cpu->cpu_index] == 's') {
cpu_single_step(cpu, sstep_flags);
}
}
s->running_state = 1;
#else
int flag = 0;
if (!runstate_needs_reset()) {
if (vm_prepare_start()) {
return 0;
}
CPU_FOREACH(cpu) {
switch (newstates[cpu->cpu_index]) {
case 0:
case 1:
break; /* nothing to do here */
case 's':
cpu_single_step(cpu, sstep_flags);
cpu_resume(cpu);
flag = 1;
break;
case 'c':
cpu_resume(cpu);
flag = 1;
break;
default:
res = -1;
break;
}
}
}
if (flag) {
qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
}
#endif
return res;
}
static void put_buffer(GDBState *s, const uint8_t *buf, int len)
{
#ifdef CONFIG_USER_ONLY
@@ -785,6 +839,107 @@ static int is_query_packet(const char *p, const char *query, char separator)
(p[query_len] == '\0' || p[query_len] == separator);
}
/**
* gdb_handle_vcont - Parses and handles a vCont packet.
* returns -ENOTSUP if a command is unsupported, -EINVAL or -ERANGE if there is
* a format error, 0 on success.
*/
static int gdb_handle_vcont(GDBState *s, const char *p)
{
int res, idx, signal = 0;
char cur_action;
char *newstates;
unsigned long tmp;
CPUState *cpu;
#ifdef CONFIG_USER_ONLY
int max_cpus = 1; /* global variable max_cpus exists only in system mode */
CPU_FOREACH(cpu) {
max_cpus = max_cpus <= cpu->cpu_index ? cpu->cpu_index + 1 : max_cpus;
}
#endif
/* uninitialised CPUs stay 0 */
newstates = g_new0(char, max_cpus);
/* mark valid CPUs with 1 */
CPU_FOREACH(cpu) {
newstates[cpu->cpu_index] = 1;
}
/*
* res keeps track of what error we are returning, with -ENOTSUP meaning
* that the command is unknown or unsupported, thus returning an empty
* packet, while -EINVAL and -ERANGE cause an E22 packet, due to invalid,
* or incorrect parameters passed.
*/
res = 0;
while (*p) {
if (*p++ != ';') {
res = -ENOTSUP;
goto out;
}
cur_action = *p++;
if (cur_action == 'C' || cur_action == 'S') {
cur_action = tolower(cur_action);
res = qemu_strtoul(p + 1, &p, 16, &tmp);
if (res) {
goto out;
}
signal = gdb_signal_to_target(tmp);
} else if (cur_action != 'c' && cur_action != 's') {
/* unknown/invalid/unsupported command */
res = -ENOTSUP;
goto out;
}
/* thread specification. special values: (none), -1 = all; 0 = any */
if ((p[0] == ':' && p[1] == '-' && p[2] == '1') || (p[0] != ':')) {
if (*p == ':') {
p += 3;
}
for (idx = 0; idx < max_cpus; idx++) {
if (newstates[idx] == 1) {
newstates[idx] = cur_action;
}
}
} else if (*p == ':') {
p++;
res = qemu_strtoul(p, &p, 16, &tmp);
if (res) {
goto out;
}
idx = tmp;
/* 0 means any thread, so we pick the first valid CPU */
if (!idx) {
idx = cpu_index(first_cpu);
}
/*
* If we are in user mode, the thread specified is actually a
* thread id, and not an index. We need to find the actual
* CPU first, and only then we can use its index.
*/
cpu = find_cpu(idx);
/* invalid CPU/thread specified */
if (!idx || !cpu) {
res = -EINVAL;
goto out;
}
/* only use if no previous match occourred */
if (newstates[cpu->cpu_index] == 1) {
newstates[cpu->cpu_index] = cur_action;
}
}
}
s->signal = signal;
gdb_continue_partial(s, newstates);
out:
g_free(newstates);
return res;
}
static int gdb_handle_packet(GDBState *s, const char *line_buf)
{
CPUState *cpu;
@@ -830,60 +985,20 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
return RS_IDLE;
case 'v':
if (strncmp(p, "Cont", 4) == 0) {
int res_signal, res_thread;
p += 4;
if (*p == '?') {
put_packet(s, "vCont;c;C;s;S");
break;
}
res = 0;
res_signal = 0;
res_thread = 0;
while (*p) {
int action, signal;
if (*p++ != ';') {
res = 0;
break;
}
action = *p++;
signal = 0;
if (action == 'C' || action == 'S') {
signal = gdb_signal_to_target(strtoul(p, (char **)&p, 16));
if (signal == -1) {
signal = 0;
}
} else if (action != 'c' && action != 's') {
res = 0;
break;
}
thread = 0;
if (*p == ':') {
thread = strtoull(p+1, (char **)&p, 16);
}
action = tolower(action);
if (res == 0 || (res == 'c' && action == 's')) {
res = action;
res_signal = signal;
res_thread = thread;
}
}
res = gdb_handle_vcont(s, p);
if (res) {
if (res_thread != -1 && res_thread != 0) {
cpu = find_cpu(res_thread);
if (cpu == NULL) {
put_packet(s, "E22");
break;
}
s->c_cpu = cpu;
if ((res == -EINVAL) || (res == -ERANGE)) {
put_packet(s, "E22");
break;
}
if (res == 's') {
cpu_single_step(s->c_cpu, sstep_flags);
}
s->signal = res_signal;
gdb_continue(s);
return RS_IDLE;
goto unknown_command;
}
break;
} else {

29
hmp.c
View File

@@ -1014,8 +1014,14 @@ void hmp_memsave(Monitor *mon, const QDict *qdict)
const char *filename = qdict_get_str(qdict, "filename");
uint64_t addr = qdict_get_int(qdict, "val");
Error *err = NULL;
int cpu_index = monitor_get_cpu_index();
qmp_memsave(addr, size, filename, true, monitor_get_cpu_index(), &err);
if (cpu_index < 0) {
monitor_printf(mon, "No CPU available\n");
return;
}
qmp_memsave(addr, size, filename, true, cpu_index, &err);
hmp_handle_error(mon, &err);
}
@@ -1338,12 +1344,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
{
const char *param = qdict_get_str(qdict, "parameter");
const char *valuestr = qdict_get_str(qdict, "value");
int64_t valuebw = 0;
uint64_t valuebw = 0;
long valueint = 0;
char *endp;
Error *err = NULL;
bool use_int_value = false;
int i;
int i, ret;
for (i = 0; i < MIGRATION_PARAMETER__MAX; i++) {
if (strcmp(param, MigrationParameter_lookup[i]) == 0) {
@@ -1379,9 +1384,9 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
break;
case MIGRATION_PARAMETER_MAX_BANDWIDTH:
p.has_max_bandwidth = true;
valuebw = qemu_strtosz(valuestr, &endp);
if (valuebw < 0 || (size_t)valuebw != valuebw
|| *endp != '\0') {
ret = qemu_strtosz_MiB(valuestr, NULL, &valuebw);
if (ret < 0 || valuebw > INT64_MAX
|| (size_t)valuebw != valuebw) {
error_setg(&err, "Invalid size %s", valuestr);
goto cleanup;
}
@@ -1552,6 +1557,7 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
BlockIOThrottle throttle = {
.has_device = true,
.device = (char *) qdict_get_str(qdict, "device"),
.bps = qdict_get_int(qdict, "bps"),
.bps_rd = qdict_get_int(qdict, "bps_rd"),
@@ -2148,10 +2154,15 @@ void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
{
IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
IOThreadInfoList *info;
IOThreadInfo *value;
for (info = info_list; info; info = info->next) {
monitor_printf(mon, "%s: thread_id=%" PRId64 "\n",
info->value->id, info->value->thread_id);
value = info->value;
monitor_printf(mon, "%s:\n", value->id);
monitor_printf(mon, " thread_id=%" PRId64 "\n", value->thread_id);
monitor_printf(mon, " poll-max-ns=%" PRId64 "\n", value->poll_max_ns);
monitor_printf(mon, " poll-grow=%" PRId64 "\n", value->poll_grow);
monitor_printf(mon, " poll-shrink=%" PRId64 "\n", value->poll_shrink);
}
qapi_free_IOThreadInfoList(info_list);

View File

@@ -2374,7 +2374,7 @@ static void coroutine_fn v9fs_flush(void *opaque)
/*
* Wait for pdu to complete.
*/
qemu_co_queue_wait(&cancel_pdu->complete);
qemu_co_queue_wait(&cancel_pdu->complete, NULL);
cancel_pdu->cancelled = 0;
pdu_free(cancel_pdu);
}

View File

@@ -198,7 +198,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
state->dev_count = id_list->len;
state->devs = g_new0(typeof(*state->devs), state->dev_count);
for (i = 0; i < id_list->len; i++) {
state->devs[i].cpu = id_list->cpus[i].cpu;
state->devs[i].cpu = CPU(id_list->cpus[i].cpu);
state->devs[i].arch_id = id_list->cpus[i].arch_id;
}
memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state,

View File

@@ -177,6 +177,7 @@ static void clipper_machine_init(MachineClass *mc)
{
mc->desc = "Alpha DP264/CLIPPER";
mc->init = clipper_init;
mc->block_default_type = IF_IDE;
mc->max_cpus = 4;
mc->is_default = 1;
}

View File

@@ -113,9 +113,19 @@ static void write_boot_rom(DriveInfo *dinfo, hwaddr addr, size_t rom_size,
{
BlockBackend *blk = blk_by_legacy_dinfo(dinfo);
uint8_t *storage;
int64_t size;
if (rom_size > blk_getlength(blk)) {
rom_size = blk_getlength(blk);
/* The block backend size should have already been 'validated' by
* the creation of the m25p80 object.
*/
size = blk_getlength(blk);
if (size <= 0) {
error_setg(errp, "failed to get flash size");
return;
}
if (rom_size > size) {
rom_size = size;
}
storage = g_new0(uint8_t, rom_size);
@@ -138,10 +148,6 @@ static void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype,
DriveInfo *dinfo = drive_get_next(IF_MTD);
qemu_irq cs_line;
/*
* FIXME: check that we are not using a flash module exceeding
* the controller segment size
*/
fl->flash = ssi_create_slave_no_init(s->spi, flashtype);
if (dinfo) {
qdev_prop_set_drive(fl->flash, "drive", blk_by_legacy_dinfo(dinfo),
@@ -200,7 +206,9 @@ static void aspeed_board_init(MachineState *machine,
/*
* create a ROM region using the default mapping window size of
* the flash module.
* the flash module. The window size is 64MB for the AST2400
* SoC and 128MB for the AST2500 SoC, which is twice as big as
* needed by the flash modules of the Aspeed machines.
*/
memory_region_init_rom(boot_rom, OBJECT(bmc), "aspeed.boot_rom",
fl->size, &error_abort);

View File

@@ -31,6 +31,7 @@
#define ASPEED_SOC_SCU_BASE 0x1E6E2000
#define ASPEED_SOC_SRAM_BASE 0x1E720000
#define ASPEED_SOC_TIMER_BASE 0x1E782000
#define ASPEED_SOC_WDT_BASE 0x1E785000
#define ASPEED_SOC_I2C_BASE 0x1E78A000
static const int uart_irqs[] = { 9, 32, 33, 34, 10 };
@@ -170,6 +171,10 @@ static void aspeed_soc_init(Object *obj)
sc->info->silicon_rev);
object_property_add_alias(obj, "ram-size", OBJECT(&s->sdmc),
"ram-size", &error_abort);
object_initialize(&s->wdt, sizeof(s->wdt), TYPE_ASPEED_WDT);
object_property_add_child(obj, "wdt", OBJECT(&s->wdt), NULL);
qdev_set_parent_bus(DEVICE(&s->wdt), sysbus_get_default());
}
static void aspeed_soc_realize(DeviceState *dev, Error **errp)
@@ -286,6 +291,14 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp)
return;
}
sysbus_mmio_map(SYS_BUS_DEVICE(&s->sdmc), 0, ASPEED_SOC_SDMC_BASE);
/* Watch dog */
object_property_set_bool(OBJECT(&s->wdt), true, "realized", &err);
if (err) {
error_propagate(errp, err);
return;
}
sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt), 0, ASPEED_SOC_WDT_BASE);
}
static void aspeed_soc_class_init(ObjectClass *oc, void *data)

View File

@@ -71,6 +71,8 @@ static void cubieboard_init(MachineState *machine)
memory_region_add_subregion(get_system_memory(), AW_A10_SDRAM_BASE,
&s->sdram);
/* TODO create and connect IDE devices for ide_drive_get() */
cubieboard_binfo.ram_size = machine->ram_size;
cubieboard_binfo.kernel_filename = machine->kernel_filename;
cubieboard_binfo.kernel_cmdline = machine->kernel_cmdline;
@@ -82,6 +84,8 @@ static void cubieboard_machine_init(MachineClass *mc)
{
mc->desc = "cubietech cubieboard";
mc->init = cubieboard_init;
mc->block_default_type = IF_IDE;
mc->units_per_default_bus = 1;
}
DEFINE_MACHINE("cubieboard", cubieboard_machine_init)

View File

@@ -363,6 +363,8 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id)
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 2, pic[82]);
}
/* TODO create and connect IDE devices for ide_drive_get() */
highbank_binfo.ram_size = ram_size;
highbank_binfo.kernel_filename = kernel_filename;
highbank_binfo.kernel_cmdline = kernel_cmdline;
@@ -405,7 +407,8 @@ static void highbank_class_init(ObjectClass *oc, void *data)
mc->desc = "Calxeda Highbank (ECX-1000)";
mc->init = highbank_init;
mc->block_default_type = IF_SCSI;
mc->block_default_type = IF_IDE;
mc->units_per_default_bus = 1;
mc->max_cpus = 4;
}
@@ -421,7 +424,8 @@ static void midway_class_init(ObjectClass *oc, void *data)
mc->desc = "Calxeda Midway (ECX-2000)";
mc->init = midway_init;
mc->block_default_type = IF_SCSI;
mc->block_default_type = IF_IDE;
mc->units_per_default_bus = 1;
mc->max_cpus = 4;
}

View File

@@ -53,6 +53,26 @@ static uint8_t integrator_spd[128] = {
0xe, 4, 0x1c, 1, 2, 0x20, 0xc0, 0, 0, 0, 0, 0x30, 0x28, 0x30, 0x28, 0x40
};
static const VMStateDescription vmstate_integratorcm = {
.name = "integratorcm",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(cm_osc, IntegratorCMState),
VMSTATE_UINT32(cm_ctrl, IntegratorCMState),
VMSTATE_UINT32(cm_lock, IntegratorCMState),
VMSTATE_UINT32(cm_auxosc, IntegratorCMState),
VMSTATE_UINT32(cm_sdram, IntegratorCMState),
VMSTATE_UINT32(cm_init, IntegratorCMState),
VMSTATE_UINT32(cm_flags, IntegratorCMState),
VMSTATE_UINT32(cm_nvflags, IntegratorCMState),
VMSTATE_UINT32(int_level, IntegratorCMState),
VMSTATE_UINT32(irq_enabled, IntegratorCMState),
VMSTATE_UINT32(fiq_enabled, IntegratorCMState),
VMSTATE_END_OF_LIST()
}
};
static uint64_t integratorcm_read(void *opaque, hwaddr offset,
unsigned size)
{
@@ -309,6 +329,18 @@ typedef struct icp_pic_state {
qemu_irq parent_fiq;
} icp_pic_state;
static const VMStateDescription vmstate_icp_pic = {
.name = "icp_pic",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(level, icp_pic_state),
VMSTATE_UINT32(irq_enabled, icp_pic_state),
VMSTATE_UINT32(fiq_enabled, icp_pic_state),
VMSTATE_END_OF_LIST()
}
};
static void icp_pic_update(icp_pic_state *s)
{
uint32_t flags;
@@ -438,6 +470,16 @@ typedef struct ICPCtrlRegsState {
#define ICP_INTREG_WPROT (1 << 0)
#define ICP_INTREG_CARDIN (1 << 3)
static const VMStateDescription vmstate_icp_control = {
.name = "icp_control",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(intreg_state, ICPCtrlRegsState),
VMSTATE_END_OF_LIST()
}
};
static uint64_t icp_control_read(void *opaque, hwaddr offset,
unsigned size)
{
@@ -535,27 +577,42 @@ static void integratorcp_init(MachineState *machine)
const char *kernel_filename = machine->kernel_filename;
const char *kernel_cmdline = machine->kernel_cmdline;
const char *initrd_filename = machine->initrd_filename;
char **cpustr;
ObjectClass *cpu_oc;
CPUClass *cc;
Object *cpuobj;
ARMCPU *cpu;
const char *typename;
MemoryRegion *address_space_mem = get_system_memory();
MemoryRegion *ram = g_new(MemoryRegion, 1);
MemoryRegion *ram_alias = g_new(MemoryRegion, 1);
qemu_irq pic[32];
DeviceState *dev, *sic, *icp;
int i;
Error *err = NULL;
if (!cpu_model) {
cpu_model = "arm926";
}
cpu_oc = cpu_class_by_name(TYPE_ARM_CPU, cpu_model);
cpustr = g_strsplit(cpu_model, ",", 2);
cpu_oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]);
if (!cpu_oc) {
fprintf(stderr, "Unable to find CPU definition\n");
exit(1);
}
typename = object_class_get_name(cpu_oc);
cpuobj = object_new(object_class_get_name(cpu_oc));
cc = CPU_CLASS(cpu_oc);
cc->parse_features(typename, cpustr[1], &err);
g_strfreev(cpustr);
if (err) {
error_report_err(err);
exit(1);
}
cpuobj = object_new(typename);
/* By default ARM1176 CPUs have EL3 enabled. This board does not
* currently support EL3 so the CPU EL3 property is disabled before
@@ -640,6 +697,21 @@ static void core_class_init(ObjectClass *klass, void *data)
dc->props = core_properties;
dc->realize = integratorcm_realize;
dc->vmsd = &vmstate_integratorcm;
}
static void icp_pic_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->vmsd = &vmstate_icp_pic;
}
static void icp_control_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->vmsd = &vmstate_icp_control;
}
static const TypeInfo core_info = {
@@ -655,6 +727,7 @@ static const TypeInfo icp_pic_info = {
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(icp_pic_state),
.instance_init = icp_pic_init,
.class_init = icp_pic_class_init,
};
static const TypeInfo icp_ctrl_regs_info = {
@@ -662,6 +735,7 @@ static const TypeInfo icp_ctrl_regs_info = {
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(ICPCtrlRegsState),
.instance_init = icp_control_init,
.class_init = icp_control_class_init,
};
static void integratorcp_register_types(void)

View File

@@ -259,7 +259,7 @@ static void realview_init(MachineState *machine,
}
n = drive_get_max_bus(IF_SCSI);
while (n >= 0) {
pci_create_simple(pci_bus, -1, "lsi53c895a");
lsi53c895a_create(pci_bus);
n--;
}
}
@@ -443,7 +443,6 @@ static void realview_pbx_a9_class_init(ObjectClass *oc, void *data)
mc->desc = "ARM RealView Platform Baseboard Explore for Cortex-A9";
mc->init = realview_pbx_a9_init;
mc->block_default_type = IF_SCSI;
mc->max_cpus = 4;
}

View File

@@ -998,6 +998,7 @@ static void spitzpda_class_init(ObjectClass *oc, void *data)
mc->desc = "Sharp SL-C3000 (Spitz) PDA (PXA270)";
mc->init = spitz_init;
mc->block_default_type = IF_IDE;
}
static const TypeInfo spitzpda_type = {
@@ -1012,6 +1013,7 @@ static void borzoipda_class_init(ObjectClass *oc, void *data)
mc->desc = "Sharp SL-C3100 (Borzoi) PDA (PXA270)";
mc->init = borzoi_init;
mc->block_default_type = IF_IDE;
}
static const TypeInfo borzoipda_type = {
@@ -1026,6 +1028,7 @@ static void terrierpda_class_init(ObjectClass *oc, void *data)
mc->desc = "Sharp SL-C3200 (Terrier) PDA (PXA270)";
mc->init = terrier_init;
mc->block_default_type = IF_IDE;
}
static const TypeInfo terrierpda_type = {

View File

@@ -21,6 +21,7 @@
#include "exec/address-spaces.h"
#include "sysemu/sysemu.h"
#include "hw/char/pl011.h"
#include "hw/misc/unimp.h"
#define GPIO_A 0
#define GPIO_B 1
@@ -1220,6 +1221,40 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model,
0x40024000, 0x40025000, 0x40026000};
static const int gpio_irq[7] = {0, 1, 2, 3, 4, 30, 31};
/* Memory map of SoC devices, from
* Stellaris LM3S6965 Microcontroller Data Sheet (rev I)
* http://www.ti.com/lit/ds/symlink/lm3s6965.pdf
*
* 40000000 wdtimer (unimplemented)
* 40002000 i2c (unimplemented)
* 40004000 GPIO
* 40005000 GPIO
* 40006000 GPIO
* 40007000 GPIO
* 40008000 SSI
* 4000c000 UART
* 4000d000 UART
* 4000e000 UART
* 40020000 i2c
* 40021000 i2c (unimplemented)
* 40024000 GPIO
* 40025000 GPIO
* 40026000 GPIO
* 40028000 PWM (unimplemented)
* 4002c000 QEI (unimplemented)
* 4002d000 QEI (unimplemented)
* 40030000 gptimer
* 40031000 gptimer
* 40032000 gptimer
* 40033000 gptimer
* 40038000 ADC
* 4003c000 analogue comparator (unimplemented)
* 40048000 ethernet
* 400fc000 hibernation module (unimplemented)
* 400fd000 flash memory control (unimplemented)
* 400fe000 system control
*/
DeviceState *gpio_dev[7], *nvic;
qemu_irq gpio_in[7][8];
qemu_irq gpio_out[7][8];
@@ -1370,6 +1405,19 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model,
}
}
}
/* Add dummy regions for the devices we don't implement yet,
* so guest accesses don't cause unlogged crashes.
*/
create_unimplemented_device("wdtimer", 0x40000000, 0x1000);
create_unimplemented_device("i2c-0", 0x40002000, 0x1000);
create_unimplemented_device("i2c-2", 0x40021000, 0x1000);
create_unimplemented_device("PWM", 0x40028000, 0x1000);
create_unimplemented_device("QEI-0", 0x4002c000, 0x1000);
create_unimplemented_device("QEI-1", 0x4002d000, 0x1000);
create_unimplemented_device("analogue-comparator", 0x4003c000, 0x1000);
create_unimplemented_device("hibernation", 0x400fc000, 0x1000);
create_unimplemented_device("flash-control", 0x400fd000, 0x1000);
}
/* FIXME: Figure out how to generate these from stellaris_boards. */

View File

@@ -263,6 +263,7 @@ static void tosapda_machine_init(MachineClass *mc)
{
mc->desc = "Sharp SL-6000 (Tosa) PDA (PXA255)";
mc->init = tosa_init;
mc->block_default_type = IF_IDE;
}
DEFINE_MACHINE("tosa", tosapda_machine_init)

View File

@@ -290,7 +290,7 @@ static void versatile_init(MachineState *machine, int board_id)
}
n = drive_get_max_bus(IF_SCSI);
while (n >= 0) {
pci_create_simple(pci_bus, -1, "lsi53c895a");
lsi53c895a_create(pci_bus);
n--;
}

View File

@@ -452,6 +452,7 @@ static int add_virtio_mmio_node(void *fdt, uint32_t acells, uint32_t scells,
acells, addr, scells, size);
qemu_fdt_setprop_cells(fdt, nodename, "interrupt-parent", intc);
qemu_fdt_setprop_cells(fdt, nodename, "interrupts", 0, irq, 1);
qemu_fdt_setprop(fdt, nodename, "dma-coherent", NULL, 0);
g_free(nodename);
if (rc) {
return -1;
@@ -751,7 +752,6 @@ static void vexpress_class_init(ObjectClass *oc, void *data)
mc->desc = "ARM Versatile Express";
mc->init = vexpress_common_init;
mc->block_default_type = IF_SCSI;
mc->max_cpus = 4;
}

View File

@@ -90,6 +90,7 @@ static void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap)
aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002")));
/* device present, functioning, decoding, not shown in UI */
aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
Aml *crs = aml_resource_template();
aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base,
@@ -135,6 +136,7 @@ static void acpi_dsdt_add_virtio(Aml *scope,
Aml *dev = aml_device("VR%02u", i);
aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0005")));
aml_append(dev, aml_name_decl("_UID", aml_int(i)));
aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
Aml *crs = aml_resource_template();
aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE));

View File

@@ -471,7 +471,7 @@ static void fdt_add_pmu_nodes(const VirtMachineState *vms)
CPU_FOREACH(cpu) {
armcpu = ARM_CPU(cpu);
if (!arm_feature(&armcpu->env, ARM_FEATURE_PMU) ||
!kvm_arm_pmu_create(cpu, PPI(VIRTUAL_PMU_IRQ))) {
(kvm_enabled() && !kvm_arm_pmu_create(cpu, PPI(VIRTUAL_PMU_IRQ)))) {
return;
}
}
@@ -797,6 +797,7 @@ static void create_virtio_devices(const VirtMachineState *vms, qemu_irq *pic)
qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
GIC_FDT_IRQ_TYPE_SPI, irq,
GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
qemu_fdt_setprop(vms->fdt, nodename, "dma-coherent", NULL, 0);
g_free(nodename);
}
}
@@ -928,6 +929,7 @@ static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as)
"compatible", "qemu,fw-cfg-mmio");
qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base, 2, size);
qemu_fdt_setprop(vms->fdt, nodename, "dma-coherent", NULL, 0);
g_free(nodename);
return fw_cfg;
}

View File

@@ -323,7 +323,6 @@ static void zynq_machine_init(MachineClass *mc)
{
mc->desc = "Xilinx Zynq Platform Baseboard for Cortex-A9";
mc->init = zynq_init;
mc->block_default_type = IF_SCSI;
mc->max_cpus = 1;
mc->no_sdcard = 1;
}

View File

@@ -106,6 +106,8 @@ static void xlnx_ep108_init(MachineState *machine)
sysbus_connect_irq(SYS_BUS_DEVICE(&s->soc.spi[i]), 1, cs_line);
}
/* TODO create and connect IDE devices for ide_drive_get() */
xlnx_ep108_binfo.ram_size = ram_size;
xlnx_ep108_binfo.kernel_filename = machine->kernel_filename;
xlnx_ep108_binfo.kernel_cmdline = machine->kernel_cmdline;
@@ -118,6 +120,8 @@ static void xlnx_ep108_machine_init(MachineClass *mc)
{
mc->desc = "Xilinx ZynqMP EP108 board";
mc->init = xlnx_ep108_init;
mc->block_default_type = IF_IDE;
mc->units_per_default_bus = 1;
}
DEFINE_MACHINE("xlnx-ep108", xlnx_ep108_machine_init)
@@ -126,6 +130,8 @@ static void xlnx_zcu102_machine_init(MachineClass *mc)
{
mc->desc = "Xilinx ZynqMP ZCU102 board";
mc->init = xlnx_ep108_init;
mc->block_default_type = IF_IDE;
mc->units_per_default_bus = 1;
}
DEFINE_MACHINE("xlnx-zcu102", xlnx_zcu102_machine_init)

View File

@@ -147,7 +147,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
g_free(s);
}
static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
static bool virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
VirtQueue *vq)
{
VirtIOBlock *s = (VirtIOBlock *)vdev;
@@ -155,7 +155,7 @@ static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
assert(s->dataplane);
assert(s->dataplane_started);
virtio_blk_handle_vq(s, vq);
return virtio_blk_handle_vq(s, vq);
}
/* Context: QEMU global mutex held */

Some files were not shown because too many files have changed in this diff Show More