Compare commits

...

1012 Commits

Author SHA1 Message Date
David Gibson
125a9cb8e3 Update dtc submodule to v1.4.3
Since the last submodule update (which was v1.4.2) dtc and libfdt have
gained some features which would be useful in qemu.  There's now a v1.4.3
upstream release, so update our submodule to point to it.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 17:25:32 +11:00
Alexey Kardashevskiy
a438fa121f pseries: Update SLOF firmware image
Various fixes in this update, the full list is:

  > qemu-bootlist: Take the "-boot strict=off" setting properly into account
  > virtio-scsi: initialize vring avail queue buffers
  > virtio: Remove global variables in block and 9p driver
  > Remove superfluous checkpoints in tree.fs
  > Provide "write" function in the disk-label package
  > virtio: Implement block write support
  > scsi: Add SCSI block write support
  > deblocker: Add a 'write' function
  > virtio-scsi: Fix descriptor order for SCSI WRITE commands
  > board-qemu: Add a possibility to use hvterm input instead of USB keyboard
  > Do not try to use virtio-gpu in VGA mode
  > virtio: Fix stack comment of virtio-blk-read
  > envvar: Do not read default values for /options from the NVRAM anymore
  > envvar: Set properties in /options during "(set-defaults)"

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 17:25:32 +11:00
Peter Maydell
ecb24d334a Merge remote-tracking branch 'remotes/rth/tags/pull-tgt-20170302' into staging
Queued sparc patch

# gpg: Signature made Wed 01 Mar 2017 19:53:21 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tgt-20170302:
  target/sparc: Restore ldstub of odd asis

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 22:06:41 +00:00
Peter Maydell
6835504887 Merge remote-tracking branch 'remotes/kraxel/tags/pull-audio-20170301-1' into staging
audio: replay support, sdl2 fix.

# gpg: Signature made Wed 01 Mar 2017 15:38:09 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-audio-20170301-1:
  audio/sdlaudio: Allow audio playback with SDL2
  audio: make audio poll timer deterministic
  replay: add record/replay for audio passthrough

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 20:31:49 +00:00
Peter Maydell
b49d31a05a Merge remote-tracking branch 'remotes/kraxel/tags/pull-docs-20170301-1' into staging
docs: update sample configuration files

# gpg: Signature made Wed 01 Mar 2017 13:43:34 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-docs-20170301-1:
  mach-virt: Provide sample configuration files
  q35: Improve sample configuration files

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 19:27:30 +00:00
Peter Maydell
251501a371 Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20170228a' into staging
Migration pull

Note: The 'postcopy: Update userfaultfd.h header' is part of
Paolo's header update and will disappear if applied after it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

# gpg: Signature made Tue 28 Feb 2017 12:38:34 GMT
# gpg:                using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20170228a: (27 commits)
  postcopy: Add extra check for COPY function
  postcopy: Add doc about hugepages and postcopy
  postcopy: Check for userfault+hugepage feature
  postcopy: Update userfaultfd.h header
  postcopy: Allow hugepages
  postcopy: Send whole huge pages
  postcopy: Mask fault addresses to huge page boundary
  postcopy: Load huge pages in one go
  postcopy: Use temporary for placing zero huge pages
  postcopy: Plumb pagesize down into place helpers
  postcopy: Record largest page size
  postcopy: enhance ram_block_discard_range for hugepages
  exec: ram_block_discard_range
  postcopy: Chunk discards for hugepages
  postcopy: Transmit and compare individual page sizes
  postcopy: Transmit ram size summary word
  migration: fix use-after-free of to_dst_file
  migration: Update docs to discourage version bumps
  migration: fix id leak regression
  migrate: Introduce a 'dc->vmsd' check to avoid segfault for --only-migratable
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 17:39:12 +00:00
Peter Maydell
c9fc677a35 Merge remote-tracking branch 'remotes/elmarco/tags/leak-pull-request' into staging
# gpg: Signature made Wed 01 Mar 2017 09:02:53 GMT
# gpg:                using RSA key 0xDAE8E10975969CE5
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>"
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* remotes/elmarco/tags/leak-pull-request: (28 commits)
  tests: fix virtio-blk-test leaks
  tests: add specialized device_find function
  tests: fix usb-test leaks
  tests: allows to run single test in usb-hcd-ehci-test
  usb: release the created buses
  bus: do not unref hotplug handler
  tests: fix virtio-9p-test leaks
  tests: fix virtio-scsi-test leak
  tests: fix e1000e leaks
  tests: fix i440fx-test leaks
  tests: fix e1000-test leak
  tests: fix tco-test leaks
  tests: fix eepro100-test leak
  pc: pcihp: avoid adding ACPI_PCIHP_PROP_BSEL twice
  tests: fix ipmi-bt-test leak
  tests: fix ipmi-kcs-test leak
  tests: fix bios-tables-test leak
  tests: fix hd-geo-test leaks
  tests: fix ide-test leaks
  tests: fix vhost-user-test leaks
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 15:25:37 +00:00
Peter Maydell
ab711e216b Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.9-20170301' into staging
ppc patch queue for 2017-03-01

I was hoping to get this pull request squeezed in before the soft
freeze, but I ran into some difficulties during testing.  Everything
here was at least posted before the soft freeze, so I'm hoping we can
still merge it for 2.9.

The biggest things here are:
    * Cleanups to handling of hashed page tables, that will make
      adding support for the POWER9 MMU easier
    * Cleanups to the XICS interrupt controller that will make
      implementing the powernv machine easier
    * TCG implementation of extended overflow and carry handling for
      POWER9

It also includes:
    * Increasing the CPU limit for pseries to 1024 vCPUs
    * Generating proper OF node names in qemu (making hotplug and
      coldplug logic closer together)

# gpg: Signature made Wed 01 Mar 2017 04:43:06 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.9-20170301: (50 commits)
  Add PowerPC 32-bit guest memory dump support
  ppc/xics: rename 'ICPState *' variables to 'icp'
  ppc/xics: move InterruptStatsProvider to the sPAPR machine
  ppc/xics: move ics-simple post_load under the machine
  ppc/xics: remove the XICSState classes
  ppc/xics: export the XICS init routines
  ppc/xics: move the ICP array under the sPAPR machine
  ppc/xics: register the reset handler of ICP objects
  ppc/xics: simplify spapr_dt_xics() interface
  ppc/xics: use the QOM interface to grab an ICP
  ppc/xics: move the cpu_setup() handler under the ICPState class
  ppc/xics: simplify the cpu_setup() handler
  ppc/xics: move kernel_xics_fd out of KVMXICSState
  ppc/xics: extend the QOM interface to handle ICPs
  ppc/xics: remove the XICS list of ICS
  ppc/xics: register the reset handler of ICS objects
  ppc/xics: remove xics_find_source()
  ppc/xics: use the QOM interface to resend irqs
  ppc/xics: use the QOM interface to get irqs
  ppc/xics: use the QOM interface under the sPAPR machine
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 13:50:55 +00:00
Peter Maydell
4bc0d39a2f Merge remote-tracking branch 'remotes/mcayland/tags/qemu-openbios-signed' into staging
Update OpenBIOS images

# gpg: Signature made Tue 28 Feb 2017 22:09:11 GMT
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* remotes/mcayland/tags/qemu-openbios-signed:
  Update OpenBIOS images to 0cd97cc built from submodule.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 12:30:11 +00:00
Peter Maydell
666095c852 Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
x86 queue, 2017-02-27

"-cpu max" and query-cpu-model-expansion support for x86. This
should be the last x86 pull request before 2.9 soft freeze.

# gpg: Signature made Mon 27 Feb 2017 16:24:15 GMT
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request:
  i386: Improve query-cpu-model-expansion full mode
  i386: Implement query-cpu-model-expansion QMP command
  i386: Define static "base" CPU model
  i386: Don't set CPUClass::cpu_def on "max" model
  i386: Make "max" model not use any host CPUID info on TCG
  i386: Create "max" CPU model
  qapi-schema: Comment about full expansion of non-migration-safe models
  i386: Reorganize and document CPUID initialization steps
  i386: Rename X86CPU::host_features to X86CPU::max_features
  i386: Add ordering field to CPUClass
  i386: Unset cannot_destroy_with_object_finalize_yet on "host" model

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 11:18:01 +00:00
Peter Maydell
f1d640524d Merge remote-tracking branch 'remotes/kraxel/tags/pull-seabios-20170228-1' into staging
seabios: update to 1.10.2 release

# gpg: Signature made Tue 28 Feb 2017 08:57:57 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-seabios-20170228-1:
  seabios: update to 1.10.2 release

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 10:17:45 +00:00
Peter Maydell
d377b80338 Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170301' into staging
Queued TCG patch

# gpg: Signature made Tue 28 Feb 2017 21:30:32 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20170301:
  aarch64: Change ext type to TCGType to fix warnings

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 08:35:14 +00:00
Peter Maydell
b9fe31392b Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Tue 28 Feb 2017 20:35:32 GMT
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (46 commits)
  block: Add Error parameter to bdrv_append()
  block: Add Error parameter to bdrv_set_backing_hd()
  block: Assertions for resize permission
  block: Assertions for write permissions
  block: Pass BdrvChild to bdrv_aligned_preadv/pwritev and copy-on-read
  tests: Remove FIXME comments
  nbd/server: Use real permissions for NBD exports
  migration/block: Use real permissions
  hmp: Request permissions in qemu-io
  commit: Add filter-node-name to block-commit
  mirror: Add filter-node-name to blockdev-mirror
  stream: Use real permissions in streaming block job
  mirror: Use real permissions in mirror/active commit block job
  blockjob: Factor out block_job_remove_all_bdrv()
  block: Allow backing file links in change_parent_backing_link()
  block: BdrvChildRole.attach/detach() callbacks
  block: Fix pending requests check in bdrv_append()
  backup: Use real permissions in backup block job
  commit: Use real permissions for HMP 'commit'
  commit: Use real permissions in commit block job
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 23:09:46 +00:00
Peter Maydell
1e0addb682 Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20170228-tag' into staging
Xen 2017/02/28

# gpg: Signature made Tue 28 Feb 2017 19:13:08 GMT
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <sstabellini@kernel.org>"
# gpg:                 aka "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20170228-tag:
  Add a new qmp command to do checkpoint, query xen replication status
  Add a new qmp command to start/stop replication

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 20:33:47 +00:00
Richard Henderson
3db010c339 target/sparc: Restore ldstub of odd asis
Fixes the booting of ss20 roms.

Cc: qemu-stable@nongnu.org
Reported-by: Michael Russo <mike@papersolve.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-03-02 06:52:43 +11:00
Peter Maydell
b28f9db1a7 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170228-1' into staging
target-arm queue:
 * raspi2: add gpio controller and sdhost controller, with
   the wiring so the guest can switch which controller the
   SD card is attached to
   (this is sufficient to get raspbian kernels to boot)
 * GICv3: support state save/restore from KVM
 * update Linux headers to 4.11
 * refactor and QOMify the ARMv7M container object

# gpg: Signature made Tue 28 Feb 2017 17:11:49 GMT
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170228-1: (21 commits)
  bcm2835: add sdhost and gpio controllers
  bcm2835_gpio: add bcm2835 gpio controller
  hw/sd: add card-reparenting function
  qdev: Have qdev_set_parent_bus() handle devices already on a bus
  hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers
  target-arm: Add GICv3CPUState in CPUARMState struct
  hw/intc/arm_gicv3_kvm: Implement get/put functions
  hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate
  update Linux headers to 4.11
  update-linux-headers: update for 4.11
  stm32f205: Rename 'nvic' local to 'armv7m'
  stm32f205: Create armv7m object without using armv7m_init()
  armv7m: Split systick out from NVIC
  armv7m: Don't put core v7M devices under CONFIG_STELLARIS
  armv7m: Make bitband device take the address space to access
  armv7m: Make NVIC expose a memory region rather than mapping itself
  armv7m: Make ARMv7M object take memory region link
  armv7m: Use QOMified armv7m object in armv7m_init()
  armv7m: QOMify the armv7m container
  armv7m: Move NVICState struct definition into header
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 17:58:54 +00:00
Thomas Huth
bcf19777df audio/sdlaudio: Allow audio playback with SDL2
When compiling with SDL2, the semaphore trick used in sdlaudio.c
does not work - QEMU locks up completely in this case. To avoid
the hang and get at least some audio playback up and running (it's
a little bit crackling, but better than nothing), we can use the
SDL locking functions SDL_LockAudio() and SDL_UnlockAudio() to sync
with the sound playback thread instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-id: 1485852398-2327-1-git-send-email-thuth@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:12:03 +01:00
Pavel Dovgalyuk
1ffc266539 audio: make audio poll timer deterministic
This patch changes resetting strategy of the audio polling timer.
It does not change expiration time if the timer is already set.
This patch is needed to make this timer deterministic and to use execution
record/replay for audio devices.

audio_reset_timer is used in the function audio_vm_change_state_handler.
Therefore every time VM is stopped or restarted the timer will be reset
to new timeout. Virtual clock does not proceed while VM is stopped.
Therefore there is no need in resetting the timeout when VM restarts.

v2: updated commit message
v3: now using timer_mod_anticipate function (as suggested by Yurii Zubrytskyi)

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-id: 20170214071510.6112.76764.stgit@PASHA-ISP
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:12:03 +01:00
Pavel Dovgalyuk
3d4d16f4dc replay: add record/replay for audio passthrough
This patch adds recording and replaying audio data. Is saves synchronization
information for audio out and inputs from the microphone.

v2: removed unneeded whitespace change

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-id: 20170202055054.4848.94901.stgit@PASHA-ISP.lan02.inno

[ kraxel: add qemu/error-report.h include to fix osx build failure ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:11:44 +01:00
Peter Maydell
7287e3556f Merge remote-tracking branch 'remotes/gkurz/tags/cve-2016-9602-for-upstream' into staging
This pull request have all the fixes for CVE-2016-9602, so that it can
be easily picked up by downstreams, as suggested by Michel Tokarev.

# gpg: Signature made Tue 28 Feb 2017 10:21:32 GMT
# gpg:                using DSA key 0x02FC3AEB0101DBC2
# gpg: Good signature from "Greg Kurz <groug@kaod.org>"
# gpg:                 aka "Greg Kurz <groug@free.fr>"
# gpg:                 aka "Greg Kurz <gkurz@linux.vnet.ibm.com>"
# gpg:                 aka "Gregory Kurz (Groug) <groug@free.fr>"
# gpg:                 aka "[jpeg image of size 3330]"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2BD4 3B44 535E C0A7 9894  DBA2 02FC 3AEB 0101 DBC2

* remotes/gkurz/tags/cve-2016-9602-for-upstream: (28 commits)
  9pfs: local: drop unused code
  9pfs: local: open2: don't follow symlinks
  9pfs: local: mkdir: don't follow symlinks
  9pfs: local: mknod: don't follow symlinks
  9pfs: local: symlink: don't follow symlinks
  9pfs: local: chown: don't follow symlinks
  9pfs: local: chmod: don't follow symlinks
  9pfs: local: link: don't follow symlinks
  9pfs: local: improve error handling in link op
  9pfs: local: rename: use renameat
  9pfs: local: renameat: don't follow symlinks
  9pfs: local: lstat: don't follow symlinks
  9pfs: local: readlink: don't follow symlinks
  9pfs: local: truncate: don't follow symlinks
  9pfs: local: statfs: don't follow symlinks
  9pfs: local: utimensat: don't follow symlinks
  9pfs: local: remove: don't follow symlinks
  9pfs: local: unlinkat: don't follow symlinks
  9pfs: local: lremovexattr: don't follow symlinks
  9pfs: local: lsetxattr: don't follow symlinks
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 13:53:20 +00:00
Andrea Bolognani
166d434685 mach-virt: Provide sample configuration files
These are very much like the sample configuration files
for q35, and can be used both as documentation and as
a starting point for creating your own guest.

Two sample configuration files are provided:

  * mach-virt-graphical.cfg can be used to start a
    fully-featured (USB, graphical console, etc.)
    guest that uses VirtIO devices;

  * mach-virt-serial.cfg is similar but has a minimal
    set of devices and uses the serial console.

All configuration files are fully commented and neatly
organized.

Signed-off-by: Andrea Bolognani <abologna@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Message-id: 1487326479-8664-3-git-send-email-abologna@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 14:40:40 +01:00
Andrea Bolognani
9ca019c1dd q35: Improve sample configuration files
Instead of having a single sample configuration file,
we now have several:

  * q35-emulated.cfg documents the default devices QEMU
    adds to a q35 guest and the additional devices that
    are pretty much guaranteed to be present in a
    physical q35-based machine;

  * q35-virtio-graphical.cfg can be used to start a
    fully-featured (USB, graphical console, audio, etc.)
    guest that uses VirtIO instead of emulated devices;

  * q35-virtio-serial.cfg is similar but has a minimal
    set of devices and uses the serial console.

All configuration files are fully commented and neatly
organized.

Signed-off-by: Andrea Bolognani <abologna@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Message-id: 1487326479-8664-2-git-send-email-abologna@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 14:40:40 +01:00
Peter Maydell
e3280ffbf5 Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
# gpg: Signature made Tue 28 Feb 2017 12:40:00 GMT
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  .shippable: add s390x-cross target
  new: dockerfiles/debian-s390-cross

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 13:06:00 +00:00
Marc-André Lureau
80e1eea37a tests: fix virtio-blk-test leaks
Use qvirtio_pci_device_find_slot() to avoid leaking the non-hp
device. Add assert() to avoid further leaks in the future.

Use qvirtio_pci_device_free() to correctly free QVirtioPCIDevice.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:58:57 +04:00
Marc-André Lureau
c4523aae06 tests: add specialized device_find function
Allow specifying which slot to look for the device.

This will be used in the following patch to avoid leaking when multiple
devices exists and we want to lookup the hotplug one.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:57:04 +04:00
Marc-André Lureau
62030ed135 tests: fix usb-test leaks
Fix the usb tests leaks.

Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 11:51:29 +04:00
Marc-André Lureau
d3510ff9d7 tests: allows to run single test in usb-hcd-ehci-test
pci_init() shouldn't be a test function, but instead called before any
test. This allows to run a single test with -p /x86_64/ehci/....

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 11:51:29 +04:00
Marc-André Lureau
cd7bc87868 usb: release the created buses
Leaks spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 11:51:29 +04:00
Marc-André Lureau
675f22c6d3 bus: do not unref hotplug handler
Apparently, none of the bus owner give a reference to the hotplug
handler property, do not unref it on bus release.

Furthermore, a bus is allowed to be its own hotplug handler, which can
be seen in qbus_set_bus_hotplug_handler() function. However, in this
case, the reference can't be given to the property, or this will create
a cyclic dependency and the bus will never be free.

Each bus owner should manage the lifecycle of the hotplug handler.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
2b880bcdbe tests: fix virtio-9p-test leaks
Spotted by ASAN.

Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
3caab54d08 tests: fix virtio-scsi-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
448fe3c134 tests: fix e1000e leaks
Spotted by ASAN.

This hunk adds an assertion. It checks that we're finding no more than
one e1000e device: each hit allocates, but there is only one g_free().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
1bab33ab4a tests: fix i440fx-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
8829e16f5e tests: fix e1000-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
34779e8c39 tests: fix tco-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:51:25 +04:00
Marc-André Lureau
6afff1ffa3 tests: fix eepro100-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Weil <sw@weilnetz.de>
2017-03-01 11:51:05 +04:00
Igor Mammedov
f0c9d64a68 pc: pcihp: avoid adding ACPI_PCIHP_PROP_BSEL twice
PCI hotplug for bridges was introduced only since 2.0 however
  acpi_set_bsel()->object_property_add_uint32_ptr(bus, ACPI_PCIHP_PROP_BSEL)
didn't take in account that for legacy mode (1.7) when
PCI hotplug for bridges is unavailable and ACPI_PCIHP_PROP_BSEL property
the only bus "PCI.0' has been created earlier at acpi_pcihp_init() time.

We managed to live with it only because of error rised by adding
a duplicate property in acpi_set_bsel() has been ignored which
resulted in useless leaking of just allocated (int)bus_bsel.

Issue affects only 1.7 machine type as ACPI tables supported by
QEMU were introduced at that time, but there wasn't PCI hotplug
for bridges till the next release (2.0).

Fix it by removing duplicate ACPI_PCIHP_PROP_BSEL intialization
in acpi_pcihp_init() and doing it only in one place acpi_set_pci_info().

PS:
do not ignore error returned by object_property_add_uint32_ptr()
and abort QEMU since it's programming error which should be fixed
instead of being ignored.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reported-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1470211497-116801-1-git-send-email-imammedo@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
[ Marc-André - Remove now unused ACPI_PCIHP_LEGACY_SIZE ]
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
2017-03-01 11:51:05 +04:00
Marc-André Lureau
2607b660e9 tests: fix ipmi-bt-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 11:51:05 +04:00
Marc-André Lureau
3f5f5d04cd tests: fix ipmi-kcs-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 11:51:05 +04:00
Marc-André Lureau
f11dc27bcc tests: fix bios-tables-test leak
The inside array should be free too.
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:04 +04:00
Marc-André Lureau
2c8f86961b tests: fix hd-geo-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:04 +04:00
Marc-André Lureau
f5aa4bdc76 tests: fix ide-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-03-01 11:51:04 +04:00
Marc-André Lureau
0c0eb30260 tests: fix vhost-user-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:51:00 +04:00
Marc-André Lureau
fb6faea888 tests: fix q35-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:50:33 +04:00
Mike Nawrocki
356bb70ed1 Add PowerPC 32-bit guest memory dump support
This patch extends support for the `dump-guest-memory` command to the
32-bit PowerPC architecture. It relies on the assumption that a 64-bit
guest will not dump a 32-bit core file (and vice versa).

[dwg: I suspect this patch won't cover all cases, in particular a
32-bit machine type on a 64-bit qemu build.  However, it does strictly
more than what we had before, so might as well apply as a starting
point]

Signed-off-by: Mike Nawrocki <michael.nawrocki@gtri.gatech.edu>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:53:58 +11:00
Cédric Le Goater
8e4fba203e ppc/xics: rename 'ICPState *' variables to 'icp'
'ICPState *' variables are currently named 'ss'. This is confusing, so
let's give them an appropriate name: 'icp'.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
6449da4545 ppc/xics: move InterruptStatsProvider to the sPAPR machine
It provides a better monitor output of the ICP and ICS objects, else
the objects are printed out of order.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
a7ff1212e9 ppc/xics: move ics-simple post_load under the machine
The ICS object uses a post_load() handler which is implicitly relying
on the fact that the internal state of the ICS and ICP objects has
been restored but this is not guaranteed. So, let's move the code
under the post_load() handler of the machine where we know the objects
have been fully restored.

The icp_resend() handler of the XICSFabric QOM interface is also
removed as it is now obsolete.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
e6f7e110ee ppc/xics: remove the XICSState classes
The XICSState classes are not used anymore. They have now been fully
deprecated by the XICSFabric QOM interface. Do the cleanups.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
2192a9303d ppc/xics: export the XICS init routines
There is nothing left related to the XICS object in the realize
functions of the KVMXICSState and XICSState class. So adapt the
interfaces to call these routines directly from the sPAPR machine init
sequence.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
852ad27e14 ppc/xics: move the ICP array under the sPAPR machine
This is the last step to remove the XICSState abstraction and have the
machine hold all the objects related to interrupts : ICSs and ICPs.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
20147f2fce ppc/xics: register the reset handler of ICP objects
The reset of the ICP objects is currently handled by XICS but this can
be done for each individual ICP.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
b0ec31290c ppc/xics: simplify spapr_dt_xics() interface
spapr_dt_xics() only needs the number of servers to build the device
tree nodes. Let's change the routine interface to reflect that.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
b4f27d71e3 ppc/xics: use the QOM interface to grab an ICP
Also introduce a xics_icp_get() helper to simplify the changes.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
f023243432 ppc/xics: move the cpu_setup() handler under the ICPState class
The cpu_setup() handler is currently under the XICSState class but it
really belongs under ICPState as it is setting up an individual vCPU.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
bf50860d1b ppc/xics: simplify the cpu_setup() handler
The cpu_setup() handler currently takes a 'XICSState *' argument to
grab the kernel ICP file descriptor. This interface can be simplified
by using the 'xics' backlink of the ICP object.

This change is also required by subsequent patches which makes use of
the QOM interface for XICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
729f8a4f48 ppc/xics: move kernel_xics_fd out of KVMXICSState
The kernel ICP file descriptor is the only reason behind the
KVMXICSState class and it's in the way of more cleanups. Let's make it
a static for the moment and move forward.

If this is problem, we could use an attribute under the sPAPR machine
later on.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
b2fc59aaf9 ppc/xics: extend the QOM interface to handle ICPs
Let's add two new handlers for ICPs. One is to get an ICP object from
a server number and a second is to resend the irqs when needed.

The icp_resend() handler is a temporary workaround needed by the
ics-simple post_load() handler. It will be removed when the post_load
portion can be done at the machine level.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
d114a66225 ppc/xics: remove the XICS list of ICS
This is not used anymore.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
c79b2fdd7b ppc/xics: register the reset handler of ICS objects
The reset of the ICS objects is currently handled by XICS but this can
be done for each individual ICS. This also reduces the use of the XICS
list of ICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
be1fe35199 ppc/xics: remove xics_find_source()
It is not used anymore now that we have the QOM interface for XICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
2cd908d0ad ppc/xics: use the QOM interface to resend irqs
Also change the ICPState 'xics' backlink to be a XICSFabric, this
removes the need of using qdev_get_machine() to get the QOM interface
in some of the routines.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
f7759e4331 ppc/xics: use the QOM interface to get irqs
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
7844e12b28 ppc/xics: use the QOM interface under the sPAPR machine
Add 'ics_get' and 'ics_resend' handlers to the sPAPR machine. These
are relatively simple for a single ICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
51b180051e ppc/xics: introduce a XICSFabric QOM interface to handle ICSs
This interface provides two simple handlers. One is to get an ICS
(Interrupt Source Controller) object from an irq number and a second
to resend the irqs when needed.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
b9038e7806 ppc/xics: add an InterruptStatsProvider interface to ICS and ICP objects
This is, again, to reduce the use of the list of ICS objects. Let's
make each individual ICS and ICP object an InterruptStatsProvider and
remove this same interface from XICSState.

The InterruptStatsProvider will be moved at the machine level after
the XICS cleanups are completed.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
681bfaded6 ppc/xics: store the ICS object under the sPAPR machine
A list of ICS objects was introduced under the XICS object for the
PowerNV machine but, for the sPAPR machine, it brings extra complexity
as there is only a single ICS. To simplify the code, let's add the ICS
pointer under the sPAPR machine and try to reduce the use of this list
where possible.

Also, change the xics_spapr_*() routines to use an ICS object instead
of an XICSState and change their name to reflect that these are
specific to the sPAPR ICS object.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
817bb6a446 ppc/xics: remove set_nr_servers() handler from XICSStateClass
Today, the ICP (Interrupt Controller Presenter) objects are created by
the 'nr_servers' property handler of the XICS object and a class
handler. They are realized in the XICS object realize routine.

Let's simplify the process by creating the ICP objects along with the
XICS object at the machine level.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
4e4169f7a2 ppc/xics: remove set_nr_irqs() handler from XICSStateClass
Today, the ICS (Interrupt Controller Source) object is created and
realized by the init and realize routines of the XICS object, but some
of the parameters are only known at the machine level.

These parameters are passed from the sPAPR machine to the ICS object
in a rather convoluted way using property handlers and a class handler
of the XICS object. The number of irqs required to allocate the IRQ
state objects in the ICS realize routine is one of them.

Let's simplify the process by creating the ICS object along with the
XICS object at the machine level and link the ICS into the XICS list
of ICSs at this level also. In the sPAPR machine, there is only a
single ICS but that will change with the PowerNV machine.

Also, QOMify the creation of the objects and get rid of the
superfluous code.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
738d5db824 xics: XICS should not be a SysBusDevice
Currently xics - the component of the IBM POWER interrupt controller
representing the overall interrupt fabric / architecture is
represented as a descendent of SysBusDevice.  However, this is not
really correct - the xics presents nothing in MMIO space so it should
be an "unattached" device in the current QOM model.

Since this device will always be created by the machine type, not created
specifically from the command line, and because it has no migrated state
it should be safe to move it around the device composition tree.

Therefore this patch changes it to a descendent of TYPE_DEVICE, and
makes it an unattached device.  So that its reset handler still gets
called correctly, we add a qdev_set_parent_bus() to attach it to
sysbus.  It's not really clear that's correct (instead of using
register_reset()) but it appears to a common technique.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
[clg corrected problems with reset]
Signed-off-by: Cédric Le Goater <clg@kaod.org>
[dwg folded together and updated commit message]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Greg Kurz
a8eeafda19 spapr/pci: populate PCI DT in reverse order
Since commit 1d2d974244 "spapr_pci: enumerate and add PCI device tree", QEMU
populates the PCI device tree in the opposite order compared to SLOF.

Before 1d2d974244:

Populating /pci@800000020000000
                     00 0000 (D) : 1af4 1000    virtio [ net ]
                     00 0800 (D) : 1af4 1001    virtio [ block ]
                     00 1000 (D) : 1af4 1009    virtio [ network ]
Populating /pci@800000020000000/unknown-legacy-device@2

7e5294b8 :  /pci@800000020000000
7e52b998 :  |-- ethernet@0
7e52c0c8 :  |-- scsi@1
7e52c7e8 :  +-- unknown-legacy-device@2 ok

Since 1d2d974244:

Populating /pci@800000020000000
                     00 1000 (D) : 1af4 1009    virtio [ network ]
Populating /pci@800000020000000/unknown-legacy-device@2
                     00 0800 (D) : 1af4 1001    virtio [ block ]
                     00 0000 (D) : 1af4 1000    virtio [ net ]

7e5e8118 :  /pci@800000020000000
7e5ea6a0 :  |-- unknown-legacy-device@2
7e5eadb8 :  |-- scsi@1
7e5eb4d8 :  +-- ethernet@0 ok

This behaviour change is not actually a bug since no assumptions should be
made on DT ordering. But it has no real justification either, other than
being the consequence of the way fdt_add_subnode() inserts new elements
to the front of the FDT rather than adding them to the tail.

This patch reverts to the historical SLOF ordering by walking PCI devices
in reverse order. This reconciles pseries with x86 machine types behavior.
It is expected to make things easier when porting existing applications to
power.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Tested-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
(slight update to the changelog)
Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
b63d043418 target/ppc: add mcrxrx instruction
mcrxrx: Move to CR from XER Extended

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
c44027ffb9 target/ppc: add ov32 flag in divide operations
Add helper_div_compute_ov() in the int_helper for updating the overflow
flags.

For Divide Word:
SO, OV, and OV32 bits reflects overflow of the 32-bit result

For Divide DoubleWord:
SO, OV, and OV32 bits reflects overflow of the 64-bit result

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
61aa9a697a target/ppc: add ov32 flag for multiply low insns
For Multiply Word:
SO, OV, and OV32 bits reflects overflow of the 32-bit result

For Multiply DoubleWord:
SO, OV, and OV32 bits reflects overflow of the 64-bit result

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
1480d71cbe target/ppc: use tcg ops for neg instruction
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
dc0ad84449 target/ppc: update overflow flags for add/sub
* SO and OV reflects overflow of the 64-bit result in 64-bit mode and
  overflow of the low-order 32-bit result in 32-bit mode

* OV32 reflects overflow of the low-order 32-bit independent of the mode

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
33903d0aa4 target/ppc: update ca32 in arithmetic substract
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
6b10d008a0 target/ppc: update ca32 in arithmetic add
Adds routine to compute ca32 - gen_op_arith_compute_ca32

For 64-bit mode use the compute ca32 routine. While for 32-bit mode, CA
and CA32 will have same value.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
dd09c36159 target/ppc: support for 32-bit carry and overflow
POWER ISA 3.0 adds CA32 and OV32 status in 64-bit mode. Add the flags
and corresponding defines.

Moreover, CA32 is updated when CA is updated and OV32 is updated when OV
is updated.

Arithmetic instructions:
    * Addition and Substractions:

        addic, addic., subfic, addc, subfc, adde, subfe, addme, subfme,
        addze, and subfze always updates CA and CA32.

        => CA reflects the carry out of bit 0 in 64-bit mode and out of
           bit 32 in 32-bit mode.
        => CA32 reflects the carry out of bit 32 independent of the
           mode.

        => SO and OV reflects overflow of the 64-bit result in 64-bit
           mode and overflow of the low-order 32-bit result in 32-bit
           mode
        => OV32 reflects overflow of the low-order 32-bit independent of
           the mode

    * Multiply Low and Divide:

        For mulld, divd, divde, divdu and divdeu: SO, OV, and OV32 bits
        reflects overflow of the 64-bit result

        For mullw, divw, divwe, divwu and divweu: SO, OV, and OV32 bits
        reflects overflow of the 32-bit result

     * Negate with OE=1 (nego)

       For 64-bit mode if the register RA contains
       0x8000_0000_0000_0000, OV and OV32 are set to 1.

       For 32-bit mode if the register RA contains 0x8000_0000, OV and
       OV32 are set to 1.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
e78308fd39 target/ppc: Correct SDR1 masking
SDR_64_HTABORG, which indicates the bits of the SDR1 register to use for
the base of a 64-bit machine's hashed page table (HPT) isn't correct.  It
includes the top 46 bits of the register, but in fact the top 4 bits must
be zero (according to the ISA v2.07).  No actual implementation has
supported close to 2^60 bytes of physical address space, so it's kind of
irrelevant, but we might as well correct this.

In addition, although we checked for bad size values in SDR1, we never
reported an error if entirely invalid bits were set there.  Add this check
to ppc_store_sdr1().

Reported-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Suraj Jitindar Singh
8d63351f9f target/ppc: Remove the function ppc_hash64_set_sdr1()
The function ppc_hash64_set_sdr1 basically checked the htabsize and set an
error if it was too big, otherwise it just stored the value in SPR_SDR1.

Given that the only function which calls ppc_hash64_set_sdr1() is
ppc_store_sdr1(), why not handle the checking in ppc_store_sdr1() to avoid
the extra function call. Note that ppc_store_sdr1() already stores the
value in SPR_SDR1 anyway, so we were doing it twice.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
[dwg: Remove unnecessary error temporary]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
e57ca75ce3 target/ppc: Manage external HPT via virtual hypervisor
The pseries machine type implements the behaviour of a PAPR compliant
hypervisor, without actually executing such a hypervisor on the virtual
CPU.  To do this we need some hooks in the CPU code to make hypervisor
facilities get redirected to the machine instead of emulated internally.

For hypercalls this is managed through the cpu->vhyp field, which points
to a QOM interface with a method implementing the hypercall.

For the hashed page table (HPT) - also a hypervisor resource - we use an
older hack.  CPUPPCState has an 'external_htab' field which when non-NULL
indicates that the HPT is stored in qemu memory, rather than within the
guest's address space.

For consistency - and to make some future extensions easier - this merges
the external HPT mechanism into the vhyp mechanism.  Methods are added
to vhyp for the basic operations the core hash MMU code needs: map_hptes()
and unmap_hptes() for reading the HPT, store_hpte() for updating it and
hpt_mask() to retrieve its size.

To match this, the pseries machine now sets these vhyp fields in its
existing vhyp class, rather than reaching into the cpu object to set the
external_htab field.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
36778660d7 target/ppc: Eliminate htab_base and htab_mask variables
CPUPPCState includes fields htab_base and htab_mask which store the base
address (GPA) and size (as a mask) of the guest's hashed page table (HPT).
These are set when the SDR1 register is updated.

Keeping these in sync with the SDR1 is actually a little bit fiddly, and
probably not useful for performance, since keeping them expands the size of
CPUPPCState.  It also makes some upcoming changes harder to implement.

This patch removes these fields, in favour of calculating them directly
from the SDR1 contents when necessary.

This does make a change to the behaviour of attempting to write a bad value
(invalid HPT size) to the SDR1 with an mtspr instruction.  Previously, the
bad value would be stored in SDR1 and could be retrieved with a later
mfspr, but the HPT size as used by the softmmu would be, clamped to the
allowed values.  Now, writing a bad value is treated as a no-op.  An error
message is printed in both new and old versions.

I'm not sure which behaviour, if either, matches real hardware.  I don't
think it matters that much, since it's pretty clear that if an OS writes
a bad value to SDR1, it's not going to boot.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2017-03-01 11:23:39 +11:00
David Gibson
7222b94a83 target/ppc: Cleanup HPTE accessors for 64-bit hash MMU
Accesses to the hashed page table (HPT) are complicated by the fact that
the HPT could be in one of three places:
   1) Within guest memory - when we're emulating a full guest CPU at the
      hardware level (e.g. powernv, mac99, g3beige)
   2) Within qemu, but outside guest memory - when we're emulating user and
      supervisor instructions within TCG, but instead of emulating
      the CPU's hypervisor mode, we just emulate a hypervisor's behaviour
      (pseries in TCG or KVM-PR)
   3) Within the host kernel - a pseries machine using KVM-HV
      acceleration.  Mostly accesses to the HPT are handled by KVM,
      but there are a few cases where qemu needs to access it via a
      special fd for the purpose.

In order to batch accesses to the fd in case (3), we use a somewhat awkward
ppc_hash64_start_access() / ppc_hash64_stop_access() pair, which for case
(3) reads / releases several HPTEs from the kernel as a batch (usually a
whole PTEG).  For cases (1) & (2) it just returns an address value.  The
actual HPTE load helpers then need to interpret the returned token
differently in the 3 cases.

This patch keeps the same basic structure, but simplfiies the details.
First start_access() / stop_access() are renamed to map_hptes() and
unmap_hptes() to make their operation more obvious.  Second, map_hptes()
now always returns a qemu pointer, which can always be used in the same way
by the load_hpte() helpers.  In case (1) it comes from address_space_map()
in case (2) directly from qemu's HPT buffer and in case (3) from a
temporary buffer read from the KVM fd.

While we're at it, make things a bit more consistent in terms of types and
variable names: avoid variables named 'index' (it shadows index(3) which
can lead to confusing results), use 'hwaddr ptex' for HPTE indices and
uint64_t for each of the HPTE words, use ptex throughout the call stack
instead of pte_offset in some places (we still need that at the bottom
layer, but nowhere else).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
7d6250e3d1 target/ppc: SDR1 is a hypervisor resource
At present the SDR1 register - the base of the system's hashed page table
(HPT) - is represented as an SPR with supervisor read and write permission.
However, on CPUs which have a hypervisor mode, the SDR1 is a hypervisor
only resource.  Change the permission checking on the SPR to reflect this.

Now that this is done, we don't need to check for an external HPT executing
mtsdr1: an external HPT only applies when we're emulating the behaviour of
a hypervisor, rather than modelling the CPU's hypervisor mode internally,
so if we're permitted to execute mtsdr1, we don't have an external HPT.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
b7b0b1f13a target/ppc: Merge cpu_ppc_set_vhyp() with cpu_ppc_set_papr()
cpu_ppc_set_papr() sets up various aspects of CPU state for use with PAPR
paravirtualized guests.  However, it doesn't set the virtual hypervisor,
so callers must also call cpu_ppc_set_vhyp() so that PAPR hypercalls are
handled properly.  This is a bit silly, so fold setting the virtual
hypervisor into cpu_ppc_set_papr().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
c6404adebf pseries: Minor cleanups to HPT management hypercalls
* Standardize on 'ptex' instead of 'pte_index' for HPTE index variables
   for consistency and brevity
 * Avoid variables named 'index'; shadowing index(3) from libc can lead to
   surprising bugs if the variable is removed, because compiler errors
   might not appear for remaining references
 * Clarify index calculations in h_enter() - we have two cases, H_EXACT
   where the exact HPTE slot is given, and !H_EXACT where we search for
   an empty slot within the hash bucket.  Make the calculation more
   consistent between the cases.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
1ad9f0a464 target/ppc: Fix KVM-HV HPTE accessors
When a 'pseries' guest is running with KVM-HV, the guest's hashed page
table (HPT) is stored within the host kernel, so it is not directly
accessible to qemu.  Most of the time, qemu doesn't need to access it:
we're using the hardware MMU, and KVM itself implements the guest
hypercalls for manipulating the HPT.

However, qemu does need access to the in-KVM HPT to implement
get_phys_page_debug() for the benefit of the gdbstub, and maybe for
other debug operations.

To allow this, 7c43bca "target-ppc: Fix page table lookup with kvm
enabled" added kvmppc_hash64_read_pteg() to target/ppc/kvm.c to read
in a batch of HPTEs from the KVM table.  Unfortunately, there are a
couple of problems with this:

First, the name of the function implies it always reads a whole PTEG
from the HPT, but in fact in some cases it's used to grab individual
HPTEs (which ends up pulling 8 HPTEs, not aligned to a PTEG from the
kernel).

Second, and more importantly, the code to read the HPTEs from KVM is
simply wrong, in general.  The data from the fd that KVM provides is
designed mostly for compact migration rather than this sort of one-off
access, and so needs some decoding for this purpose.  The current code
will work in some cases, but if there are invalid HPTEs then it will
not get sane results.

This patch rewrite the HPTE reading function to have a simpler
interface (just read n HPTEs into a caller provided buffer), and to
correctly decode the stream from the kernel.

For consistency we also clean up the similar function for altering
HPTEs within KVM (introduced in c138593 "target-ppc: Update
ppc_hash64_store_hpte to support updating in-kernel htab").

Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Greg Kurz
6244bb7e58 sysemu: support up to 1024 vCPUs
Some systems can already provide more than 255 hardware threads.

Bumping the QEMU limit to 1024 seems reasonable:
- it has no visible overhead in top;
- the limit itself has no effect on hot paths.

Cc: Greg Kurz <gkurz@linux.vnet.ibm.com>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
f32899de97 target/ppc: introduce helper_update_ov_legacy
Removes duplicate code and will be useful for consolidating flags

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Laurent Vivier
2530a1a5cf spapr: generate DT node names
When DT node names for PCI devices are generated by SLOF,
they are generated according to the type of the device
(for instance, ethernet for virtio-net-pci device).

Node name for hotplugged devices is generated by QEMU.
This patch adds the mechanic to QEMU to create the node
name according to the device type too.

The data structure has been roughly copied from OpenBIOS/OpenHackware,
node names from SLOF.

Example:

Hotplugging some PCI cards with QEMU monitor:

device_add virtio-tablet-pci
device_add virtio-serial-pci
device_add virtio-mouse-pci
device_add virtio-scsi-pci
device_add virtio-gpu-pci
device_add ne2k_pci
device_add nec-usb-xhci
device_add intel-hda

What we can see in linux device tree:

for dir in /proc/device-tree/pci@800000020000000/*@*/; do
    echo $dir
    cat $dir/name
    echo
done

WITHOUT this patch:

/proc/device-tree/pci@800000020000000/pci@0/
pci
/proc/device-tree/pci@800000020000000/pci@1/
pci
/proc/device-tree/pci@800000020000000/pci@2/
pci
/proc/device-tree/pci@800000020000000/pci@3/
pci
/proc/device-tree/pci@800000020000000/pci@4/
pci
/proc/device-tree/pci@800000020000000/pci@5/
pci
/proc/device-tree/pci@800000020000000/pci@6/
pci
/proc/device-tree/pci@800000020000000/pci@7/
pci

WITH this patch:

/proc/device-tree/pci@800000020000000/communication-controller@1/
communication-controller
/proc/device-tree/pci@800000020000000/display@4/
display
/proc/device-tree/pci@800000020000000/ethernet@5/
ethernet
/proc/device-tree/pci@800000020000000/input-controller@0/
input-controller
/proc/device-tree/pci@800000020000000/mouse@2/
mouse
/proc/device-tree/pci@800000020000000/multimedia-device@7/
multimedia-device
/proc/device-tree/pci@800000020000000/scsi@3/
scsi
/proc/device-tree/pci@800000020000000/usb-xhci@6/
usb-xhci

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Laurent Vivier
089f7e827d PCI: add missing classes in pci_ids.h to build device tree
To allow QEMU to add PCI entries in device tree,
we must have a more exhaustive list of PCI class IDs.

This patch synchronizes as much as possible with
pci_ids.h and add some missing IDs from SLOF.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:38 +11:00
Nikunj A Dadhania
1bd33d0d7c target/ppc: optimize gen_write_xer()
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:38 +11:00
Nikunj A Dadhania
00b7078831 target/ppc: move cpu_[read, write]_xer to cpu.c
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:38 +11:00
Mark Cave-Ayland
9b40d1ee13 Update OpenBIOS images to 0cd97cc built from submodule.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-02-28 21:50:03 +00:00
Pranith Kumar
dc1eccd661 aarch64: Change ext type to TCGType to fix warnings
To fix the following warnings:

In file included from /users/pranith/qemu/tcg/tcg.c:255:
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:879:24: warning: implicit conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different enumeration type 'TCGType' (aka 'enum TCGType')
      [-Wenum-conversion]
        tcg_out_cmp(s, ext, a, b, b_const);
        ~~~~~~~~~~~    ^~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:893:36: warning: implicit conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different enumeration type 'TCGType' (aka 'enum TCGType')
      [-Wenum-conversion]
        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:389:65: note: expanded from macro 'tcg_out_insn'
    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
                                                                ^
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:895:37: warning: implicit conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different enumeration type 'TCGType' (aka 'enum TCGType')
      [-Wenum-conversion]
        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:389:65: note: expanded from macro 'tcg_out_insn'
    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
                                                                ^
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:1610:27: warning: implicit conversion from enumeration type 'TCGType' (aka 'enum TCGType') to different enumeration type 'TCGMemOp' (aka 'enum TCGMemOp')
      [-Wenum-conversion]
        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
        ~~~~~~~~~~~~~~    ^~~

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <20170217154311.13920-1-bobby.prani@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-03-01 08:28:06 +11:00
Marc-André Lureau
f3f8e81150 tests: fix endianness-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
072bdb07c5 tests: fix ptimer leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
461a862022 glib-compat: add g_test_add_data_func_full fallback
Move the fallback from qtest_add_data_func_full() to glib-compat.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
e703dcbaee timer: use an inline function for free
Similarly to allocation, do it from an inline function. This allows
tests to only use the headers for allocation/free of timer.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
14324f585d tests: fix leaks in test-io-channel-command
No need for strdup, fix leaks when socat is missing.

Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: "Daniel P. Berrange" <berrange@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
dc491fead0 tests: fix qmp response leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
fc34059f08 qtest: fix a memory leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 00:09:28 +04:00
Peter Maydell
758af5e862 Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20170228' into staging
Network boot for s390x. More information (and instructions
for building a s390-netboot.img) can be found at
http://wiki.qemu-project.org/Features/S390xNetworkBoot

# gpg: Signature made Tue 28 Feb 2017 11:27:18 GMT
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20170228:
  pc-bios/s390-ccw.img: rebuild image
  pc-bios/s390-ccw: Use the ccw bios to start the network boot
  s390x/ipl: Load network boot image
  s390x/ipl: Extend S390IPLState to support network boot
  elf-loader: Allow late loading of elf

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 19:52:26 +00:00
Kevin Wolf
b2c2832c61 block: Add Error parameter to bdrv_append()
Aborting on error in bdrv_append() isn't correct. This patch fixes it
and lets the callers handle failures.

Test case 085 needs a reference output update. This is caused by the
reversed order of bdrv_set_backing_hd() and change_parent_backing_link()
in bdrv_append(): When the backing file of the new node is set, the
parent nodes are still pointing to the old top, so the backing blocker
is now initialised with the node name rather than the BlockBackend name.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:51 +01:00
Kevin Wolf
12fa4af61f block: Add Error parameter to bdrv_set_backing_hd()
Not all callers of bdrv_set_backing_hd() know for sure that attaching
the backing file will be allowed by the permission system. Return the
error from the function rather than aborting.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:51 +01:00
Kevin Wolf
c8f6d58edb block: Assertions for resize permission
This adds an assertion that ensures that the necessary resize permission
has been granted before bdrv_truncate() is called.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
afa4b29323 block: Assertions for write permissions
This adds assertions that ensure that the necessary write permissions
have been granted before someone attempts to write to a node.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
85c97ca7a1 block: Pass BdrvChild to bdrv_aligned_preadv/pwritev and copy-on-read
This is where we want to check the permissions, so we need to have the
BdrvChild around where they are stored.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
2807c0cd43 tests: Remove FIXME comments
Not requesting any permissions is actually correct for these test cases
because no actual I/O or other operation covered by the permission
system is performed.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
8a7ce4f933 nbd/server: Use real permissions for NBD exports
NBD can't cope with device size changes, so resize must be forbidden,
but otherwise we can tolerate anything. Depending on whether the export
is writable or not, we only require consistent reads and writes.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
6f5ef23a3f migration/block: Use real permissions
Request BLK_PERM_CONSISTENT_READ for the source of block migration, and
handle potential permission errors as good as we can in this place
(which is not very good, but it matches the other failure cases).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
887354bd13 hmp: Request permissions in qemu-io
The HMP command 'qemu-io' is a bit tricky because it wants to work on
the original BlockBackend, but additional permissions could be required.
The details are explained in a comment in the code, but in summary, just
request whatever permissions the current qemu-io command needs.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
0db832f42e commit: Add filter-node-name to block-commit
Management tools need to be able to know about every node in the graph
and need a way to address them. Changing the graph structure was okay
because libvirt doesn't really manage the node level yet, but future
libvirt versions need to deal with both new and old version of qemu.

This new option to blockdev-commit allows the client to set a node-name
for the automatically inserted filter driver, and at the same time
serves as a witness for a future libvirt that this version of qemu does
automatically insert a filter driver.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
6cdbceb12c mirror: Add filter-node-name to blockdev-mirror
Management tools need to be able to know about every node in the graph
and need a way to address them. Changing the graph structure was okay
because libvirt doesn't really manage the node level yet, but future
libvirt versions need to deal with both new and old version of qemu.

This new option to blockdev-mirror allows the client to set a node-name
for the automatically inserted filter driver, and at the same time
serves as a witness for a future libvirt that this version of qemu does
automatically insert a filter driver.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
a170a91fd3 stream: Use real permissions in streaming block job
The correct permissions are relatively obvious here (and explained in
code comments). For intermediate streaming, we need to reopen the top
node read-write before creating the job now because the permissions
system catches attempts to get the BLK_PERM_WRITE_UNCHANGED permission
on a read-only node.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
4ef85a9c23 mirror: Use real permissions in mirror/active commit block job
The mirror block job is mainly used for two different scenarios:
Mirroring to an otherwise unused, independent target node, or for active
commit where the target node is part of the backing chain of the source.

Similarly to the commit block job patch, we need to insert a new filter
node to keep the permissions correct during active commit.

Note that one change this implies is that job->blk points to
mirror_top_bs as its root now, and mirror_top_bs (rather than the actual
source node) contains the bs->job pointer. This requires qemu-img commit
to get the job by name now rather than just taking bs->job.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Acked-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:46 +01:00
Kevin Wolf
bbc02b90bc blockjob: Factor out block_job_remove_all_bdrv()
In some cases, we want to remove op blockers on intermediate nodes
before the whole block job transaction has completed (because they block
restoring the final graph state during completion). Provide a function
for this.

The whole block job lifecycle is a bit messed up and it's hard to
actually do all things in the right order, but I'll leave simplifying
this for another day.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
3e44c8e08a block: Allow backing file links in change_parent_backing_link()
Now that the backing file child role implements .attach/.detach
callbacks, nothing prevents us from modifying the graph even if that
involves changing backing file links.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
db95dbba3b block: BdrvChildRole.attach/detach() callbacks
Backing files are somewhat special compared to other kinds of children
because they are attached and detached using bdrv_set_backing_hd()
rather than the normal set of functions, which does a few more things
like setting backing blockers, toggling the BDRV_O_NO_BACKING flag,
setting parent_bs->backing_file, etc.

These special features are a reason why change_parent_backing_link()
can't handle backing files yet. With abstracting the additional features
into .attach/.detach callbacks, we get a step closer to a function that
can actually deal with this.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
dd65a52e4a block: Fix pending requests check in bdrv_append()
bdrv_append() cares about isolation of the node that it modifies, but
not about activity in some subtree below it. Instead of using the
recursive bdrv_requests_pending(), directly check bs->in_flight, which
considers only the node in question.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
4e9e4323d5 backup: Use real permissions in backup block job
The backup block job doesn't have very complicated requirements: It
needs to read from the source and write to the target, but it's fine
with either side being changed. The only restriction is that we can't
resize the image because the job uses a cached value.

qemu-iotests 055 needs to be changed because it used a target which was
already attached to a virtio-blk device. The permission system correctly
forbids this (virtio-blk can't accept another writer with its default
share-rw=off).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
d3f0675922 commit: Use real permissions for HMP 'commit'
This is a little simpler than the commit block job because it's
synchronous and only commits into the immediate backing file, but
otherwise doing more or less the same.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
8dfba27977 commit: Use real permissions in commit block job
This is probably one of the most interesting conversions to the new
op blocker system because a commit block job intentionally leaves some
intermediate block nodes in the backing chain that aren't valid on their
own any more; only the whole chain together results in a valid view.

In order to provide the 'consistent read' permission to the parents of
the 'top' node of the commit job, a new filter block driver is inserted
above 'top' which doesn't require 'consistent read' on its backing
chain. Subsequently, the commit job can block 'consistent read' on all
intermediate nodes without causing a conflict.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
76d554e20b blockjob: Add permissions to block_job_add_bdrv()
Block jobs don't actually do I/O through the the reference they create
with block_job_add_bdrv(), but they might want to use the permisssion
system to express what the block job does to intermediate nodes. This
adds permissions to block_job_add_bdrv() to provide the means to request
permissions.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
26de9438c1 block: Add BdrvChildRole.stay_at_node
When the parents' child links are updated in bdrv_append() or
bdrv_replace_in_backing_chain(), this should affect all child links of
BlockBackends or other nodes, but not on child links held for other
purposes (like for setting permissions). This patch allows to control
the behaviour per BdrvChildRole.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
d083319fe0 block: Include details on permission errors in message
Instead of just telling that there was some conflict, we can be specific
and tell which permissions were in conflict and which way the conflict
is.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
b541155587 block: Add BdrvChildRole.get_parent_desc()
For meaningful error messages in the permission system, we need to get
some human-readable description of the parent of a BdrvChild.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
c6cc12bfa7 blockjob: Add permissions to block_job_create()
This functions creates a BlockBackend internally, so the block jobs need
to tell it what they want to do with the BB.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
dabd18f64c hw/block: Introduce share-rw qdev property
By default, don't allow another writer for block devices that are
attached to a guest device. For the cases where this setup is intended
(e.g. using a cluster filesystem on the disk), the new option can be
used to allow it.

This change affects only devices using DEFINE_BLOCK_PROPERTIES().
Devices directly using DEFINE_PROP_DRIVE() still accept writers
unconditionally.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
a17c17a274 hw/block: Request permissions
This makes all device emulations with a qdev drive property request
permissions on their BlockBackend. The only thing we block at this point
is resizing images for some devices that can't support it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
39829a01ae block: Allow error return in BlockDevOps.change_media_cb()
Some devices allow a media change between read-only and read-write
media. They need to adapt the permissions in their .change_media_cb()
implementation, which can fail. So add an Error parameter to the
function.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
c62d32f503 block: Request real permissions in blk_new_open()
We can figure out the necessary permissions from the flags that the
caller passed.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
55880601d8 block: Add BDRV_O_RESIZE for blk_new_open()
blk_new_open() is a convenience function that processes flags rather
than QDict options as a simple way to just open an image file.

In order to keep it convenient in the future, it must automatically
request the necessary permissions. This can easily be inferred from the
flags for read and write, but we need another flag that tells us whether
to get the resize permission.

We can't just always request it because that means that no block jobs
can run on the resulting BlockBackend (which is something that e.g.
qemu-img commit wants to do), but we also can't request it never because
most of the .bdrv_create() implementations call blk_truncate().

The solution is to introduce another flag that is passed by all users
that want to resize the image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
d7086422b1 block: Add error parameter to blk_insert_bs()
Now that blk_insert_bs() requests the BlockBackend permissions for the
node it attaches to, it can fail. Instead of aborting, pass the errors
to the callers.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
6d0eb64d5c block: Add permissions to blk_new()
We want every user to be specific about the permissions it needs, so
we'll pass the initial permissions as parameters to blk_new(). A user
only needs to call blk_set_perm() if it wants to change the permissions
after the fact.

The permissions are stored in the BlockBackend and applied whenever a
BlockDriverState should be attached in blk_insert_bs().

This does not include actually choosing the right set of permissions
everywhere yet. Instead, the usual FIXME comment is added to each place
and will be addressed in individual patches.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
981776b348 block: Add permissions to BlockBackend
The BlockBackend can now store the permissions that its user requires.
This is necessary because nodes can be ejected from or inserted into a
BlockBackend and all of these operations must make sure that the user
still gets what it requested initially.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
f68c598be6 block: Request real permissions in bdrv_attach_child()
Now that all block drivers with children tell us what permissions they
need from each of their children, bdrv_attach_child() can use this
information and make the right requirements while trying to attach new
children.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
78e421c9fb block: Require .bdrv_child_perm() with child nodes
All block drivers that can have child nodes implement .bdrv_child_perm()
now. Make this officially a requirement by asserting that only drivers
without children can omit .bdrv_child_perm().

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
91ef38257a vvfat: Implement .bdrv_child_perm()
vvfat is the last remaining driver that can have children, but doesn't
implement .bdrv_child_perm() yet. The default handlers aren't suitable
here, so let's implement a very simple driver-specific one that protects
the internal child from being used by other users as good as our
permissions permit.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
862f215fab block: Request child permissions in format drivers
This makes use of the .bdrv_child_perm() implementation for formats that
we just added. All format drivers expose the permissions they actually
need nows, so that they can be set accordingly and updated when parents
are attached or detached.

The only format not included here is raw, which was already converted
with the other filter drivers.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
6b1a044afb block: Default .bdrv_child_perm() for format drivers
Almost all format drivers have the same characteristics as far as
permissions are concerned: They have one or more children for storing
their own data and, more importantly, metadata (can be written to and
grow even without external write requests, must be protected against
other writers and present consistent data) and optionally a backing file
(this is just data, so like for a filter, it only depends on what the
parent nodes need).

This provides a default implementation that can be shared by most of
our format drivers.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
d7010dfb68 block: Request child permissions in filter drivers
All callers will have to request permissions for all of their child
nodes. Block drivers that act as simply filters can use the default
implementation of .bdrv_child_perm().

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
6a1b9ee152 block: Default .bdrv_child_perm() for filter drivers
Most filters need permissions related to read and write for their
children, but only if the node has a parent that wants to use the same
operation on the filter. The same is true for resize.

This adds a default implementation that simply forwards all necessary
permissions to all children of the node and leaves the other permissions
unchanged.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
33a610c398 block: Involve block drivers in permission granting
In many cases, the required permissions of one node on its children
depend on what its parents require from it. For example, the raw format
or most filter drivers only need to request consistent reads if that's
something that one of their parents wants.

In order to achieve this, this patch introduces two new BlockDriver
callbacks. The first one lets drivers first check (recursively) whether
the requested permissions can be set; the second one actually sets the
new permission bitmask.

Also add helper functions that drivers can use in their implementation
of the callbacks to update their permissions on a specific child.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
d5e6f437c5 block: Let callers request permissions when attaching a child node
When attaching a node as a child to a new parent, the required and
shared permissions for this parent are checked against all other parents
of the node now, and an error is returned if there is a conflict.

This allows error returns to a function that previously always
succeeded, and the same is true for quite a few callers and their
callers. Converting all of them within the same patch would be too much,
so for now everyone tells that they don't need any permissions and allow
everyone else to do anything. This way we can use &error_abort initially
and convert caller by caller to pass actual permission requirements and
implement error handling.

All these places are marked with FIXME comments and it will be the job
of the next patches to clean them up again.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
8b2ff5291f block: Add Error argument to bdrv_attach_child()
It will have to return an error soon, so prepare the callers for it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:35 +01:00
Kevin Wolf
7006c9a761 block: Add op blocker permission constants
This patch defines the permission categories that will be used by the
new op blocker system.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:35 +01:00
Markus Armbruster
9e19ad4e49 option: Tweak invalid size error message and unbreak iotest 049
Commit 75cdcd1 neglected to update tests/qemu-iotests/049.out, and
made the error message for negative size worse.  Fix that.

Reported-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-28 20:40:31 +01:00
Peter Lieven
2d9187bc65 qemu-img: make convert async
the convert process is currently completely implemented with sync operations.
That means it reads one buffer and then writes it. No parallelism and each sync
request takes as long as it takes until it is completed.

This can be a big performance hit when the convert process reads and writes
to devices which do not benefit from kernel readahead or pagecache.
In our environment we heavily have the following two use cases when using
qemu-img convert.

a) reading from NFS and writing to iSCSI for deploying templates
b) reading from iSCSI and writing to NFS for backups

In both processes we use libiscsi and libnfs so we have no kernel cache.

This patch changes the convert process to work with parallel running coroutines
which can significantly improve performance for network storage devices:

qemu-img (master)
 nfs -> iscsi 22.8 secs
 nfs -> ram   11.7 secs
 ram -> iscsi 12.3 secs

qemu-img-async (8 coroutines, in-order write disabled)
 nfs -> iscsi 11.0 secs
 nfs -> ram   10.4 secs
 ram -> iscsi  9.0 secs

This patches introduces 2 new cmdline parameters. The -m parameter to specify
the number of coroutines running in parallel (defaults to 8). And the -W parameter to
allow qemu-img to write to the target out of order rather than sequential. This improves
performance as the writes do not have to wait for each other to complete.

Signed-off-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-28 20:40:31 +01:00
Marc-André Lureau
e7c83a885f vhost-user: delay vhost_user_stop
Since commit b0a335e351, a socket write
may trigger a disconnect events, calling vhost_user_stop() and clearing
all the vhost_dev strutures holding data that vhost.c functions expect
to remain valid. Delay the cleanup to keep the vhost_dev structure
valid during the vhost.c functions.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170227104956.24729-1-marcandre.lureau@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 19:11:15 +00:00
Zhang Chen
daa33c5215 Add a new qmp command to do checkpoint, query xen replication status
We can call this qmp command to do checkpoint outside of qemu.
Xen colo will need this function.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wencongyang@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-02-28 11:02:12 -08:00
Zhang Chen
2c9639ecab Add a new qmp command to start/stop replication
We can call this qmp command to start/stop replication outside of qemu.
Like Xen colo need this function.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wencongyang@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-02-28 11:01:56 -08:00
Peter Maydell
9514f2648c Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging
This pull request brings:
- a fix to a minor bug reported by Coverity
- throttling support in the local backend (command line only)

# gpg: Signature made Tue 28 Feb 2017 09:32:30 GMT
# gpg:                using DSA key 0x02FC3AEB0101DBC2
# gpg: Good signature from "Greg Kurz <groug@kaod.org>"
# gpg:                 aka "Greg Kurz <groug@free.fr>"
# gpg:                 aka "Greg Kurz <gkurz@linux.vnet.ibm.com>"
# gpg:                 aka "Gregory Kurz (Groug) <groug@free.fr>"
# gpg:                 aka "[jpeg image of size 3330]"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2BD4 3B44 535E C0A7 9894  DBA2 02FC 3AEB 0101 DBC2

* remotes/gkurz/tags/for-upstream:
  throttle: factor out duplicate code
  fsdev: add IO throttle support to fsdev devices
  9pfs: fix v9fs_lock error case

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:39:49 +00:00
Clement Deschamps
1eeb5c7dea bcm2835: add sdhost and gpio controllers
This adds the bcm2835_sdhost and bcm2835_gpio to the BCM2835 platform.

For supporting the SD controller selection (alternate function of GPIOs
48-53), the bcm2835_gpio now exposes an sdbus.
It also has a link to both the sdbus of sdhci and sdhost controllers,
and the card is reparented from one bus to another when the alternate
function of GPIOs 48-53 is modified.

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488293711-14195-5-git-send-email-peter.maydell@linaro.org
Message-id: 20170224164021.9066-5-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Clement Deschamps
d72fc9dcb1 bcm2835_gpio: add bcm2835 gpio controller
This adds the BCM2835 GPIO controller.

It currently implements:
- The 54 GPIOs as outputs (qemu_irq)
- The SD controller selection via alternate function of GPIOs 48-53

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488293711-14195-4-git-send-email-peter.maydell@linaro.org
Message-id: 20170224164021.9066-4-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Clement Deschamps
97fb87cc5d hw/sd: add card-reparenting function
Provide a new function sdbus_reparent_card() in sd core for reparenting
a card from a SDBus to another one.

This function is required by the raspi platform, where the two SD
controllers can be dynamically switched.

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488293711-14195-3-git-send-email-peter.maydell@linaro.org
Message-id: 20170224164021.9066-3-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: added a doc comment to the header file; changed to
 use new behaviour of qdev_set_parent_bus()]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Peter Maydell
91c968ac72 qdev: Have qdev_set_parent_bus() handle devices already on a bus
Instead of qdev_set_parent_bus() silently doing the wrong
thing if it's handed a device that's already on a bus,
have it remove the device from the old bus and add it to
the new one. This is useful for the raspi2 sdcard.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Message-id: 1488293711-14195-2-git-send-email-peter.maydell@linaro.org
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
07a5628cb8 hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers
Reset CPU interface registers of GICv3 when CPU is reset.
For this, ARMCPRegInfo struct is registered with one ICC
register whose resetfn is called when cpu is reset.

All the ICC registers are reset under one single register
reset function instead of calling resetfn for each ICC
register.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487850673-26455-6-git-send-email-vijay.kilari@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
d3a3e52962 target-arm: Add GICv3CPUState in CPUARMState struct
Add gicv3state void pointer to CPUARMState struct
to store GICv3CPUState.

In case of usecase like CPU reset, we need to reset
GICv3CPUState of the CPU. In such scenario, this pointer
becomes handy.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487850673-26455-5-git-send-email-vijay.kilari@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
367b9f527b hw/intc/arm_gicv3_kvm: Implement get/put functions
This actually implements pre_save and post_load methods for in-kernel
vGICv3.

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Message-id: 1487850673-26455-4-git-send-email-vijay.kilari@gmail.com
[PMM:
 * use decimal, not 0bnnn
 * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
 * completely rearranged the get and put functions to read and write
   the state in a natural order, rather than mixing distributor and
   redistributor state together]
Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
[Vijay:
 * Update macro KVM_VGIC_ATTR
 * Use 32 bit access for gicd and gicr
 * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
   access  are changed from 64-bit to 32-bit access
 * Add ICC_SRE_EL1 save and restore
 * Dropped translate_fn mechanism and coded functions to handle
   save and restore of edge_trigger and priority
 * Number of APnR register saved/restored based on number of
   priority bits supported]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
6692aac411 hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate
To Save and Restore ICC_SRE_EL1 register introduce vmstate
subsection and load only if non-zero.
Also initialize icc_sre_el1 with to 0x7 in pre_load
function.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487850673-26455-3-git-send-email-vijay.kilari@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Peter Maydell
7d1730b7d9 Merge remote-tracking branch 'remotes/mjt/tags/trivial-patches-fetch' into staging
trivial patches for 2017-02-28

# gpg: Signature made Tue 28 Feb 2017 06:43:55 GMT
# gpg:                using RSA key 0x701B4F6B1A693E59
# gpg: Good signature from "Michael Tokarev <mjt@tls.msk.ru>"
# gpg:                 aka "Michael Tokarev <mjt@corpit.ru>"
# gpg:                 aka "Michael Tokarev <mjt@debian.org>"
# Primary key fingerprint: 6EE1 95D1 886E 8FFB 810D  4324 457C E0A0 8044 65C5
#      Subkey fingerprint: 7B73 BAD6 8BE7 A2C2 8931  4B22 701B 4F6B 1A69 3E59

* remotes/mjt/tags/trivial-patches-fetch:
  syscall: fixed mincore(2) not failing with ENOMEM
  hw/acpi/tco.c: fix tco timer stop
  lm32: milkymist-tmu2: fix a third integer overflow
  qemu-options.hx: add missing id=chr0 chardev argument in vhost-user example
  Update copyright year
  tests/prom-env: Enable the test for the sun4u machine, too
  cadence_gem: Remove unused parameter debug message
  register: fix incorrect read mask
  ide: remove undefined behavior in ide-test
  CODING_STYLE: Mention preferred comment form
  hw/core/register: Mark the device with cannot_instantiate_with_device_add_yet
  hw/core/or-irq: Mark the device with cannot_instantiate_with_device_add_yet
  softfloat: Use correct type in float64_to_uint64_round_to_zero()
  target/s390x: Fix typo

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 16:22:41 +00:00
Paolo Bonzini
3a5eb5b4a9 update Linux headers to 4.11
virtio_mmio.h would be deleted; I am leaving it in though it was a
mistake to add it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 16:18:49 +00:00
Paolo Bonzini
f717e6245f update-linux-headers: update for 4.11
The linux-headers/asm-arm/unistd.h file has been split in three
sub-files, copy them along.  However, building them requires
setting ARCH rather than SRCARCH.

SRCARCH defaults to $(ARCH) anyway; to avoid future occurrence of
the same problem use ARCH for all architectures where SRCARCH=ARCH.
Currently these are all except x86, sparc, sh and tile.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170221122920.16245-2-pbonzini@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 16:18:49 +00:00
Peter Maydell
8a85e0654e stm32f205: Rename 'nvic' local to 'armv7m'
The local variable 'nvic' in stm32f205_soc_realize() no longer
holds a direct pointer to the NVIC device; it is a pointer to
the ARMv7M container object. Rename it 'armv7m' accordingly.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-12-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
b72e2f6856 stm32f205: Create armv7m object without using armv7m_init()
Switch the stm32f205 SoC to create the armv7m object directly
rather than via the armv7m_init() wrapper. This fits better
with the SoC model's very QOMified design.

In particular this means we can push loading the guest image
out to the top level board code where it belongs, rather
than the SoC object having a QOM property for the filename
to load.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-11-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
ff68dacbc7 armv7m: Split systick out from NVIC
The SysTick timer isn't really part of the NVIC proper;
we just modelled it that way back when we couldn't
easily have devices that only occupied a small chunk
of a memory region. Split it out into its own device.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1487604965-23220-10-git-send-email-peter.maydell@linaro.org
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 16:18:49 +00:00
Peter Maydell
743eb70560 armv7m: Don't put core v7M devices under CONFIG_STELLARIS
The NVIC is a core v7M device that exists for all v7M CPUs;
put it under a CONFIG_ARM_V7M rather than hiding it under
CONFIG_STELLARIS.

(We'll use CONFIG_ARM_V7M for the SysTick device too
when we split it out of the NVIC.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-9-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
f68d881c9b armv7m: Make bitband device take the address space to access
Instead of the bitband device doing a cpu_physical_memory_read/write,
make it take a MemoryRegion which specifies where it should be
accessing, and use address_space_read/write to access the
corresponding AddressSpace.

Since this entails pretty much a rewrite, convert away from
old_mmio in the process.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-8-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
98957a94ef armv7m: Make NVIC expose a memory region rather than mapping itself
Make the NVIC device expose a memory region for its users
to map, rather than mapping itself into the system memory
space on realize, and get the one user (the ARMv7M object)
to do this.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-7-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
618119c2d3 armv7m: Make ARMv7M object take memory region link
Make the ARMv7M object take a memory region link which it uses
to wire up the bitband rather than having them always put
themselves in the system address space.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-6-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
21e0c38fe2 armv7m: Use QOMified armv7m object in armv7m_init()
Make the legacy armv7m_init() function use the newly QOMified
armv7m object rather than doing everything by hand.

We can return the armv7m object rather than the NVIC from
armv7m_init() because its interface to the rest of the
board (GPIOs, etc) is identical.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-5-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
56b7c66f49 armv7m: QOMify the armv7m container
Create a proper QOM object for the armv7m container, which
holds the CPU, the NVIC and the bitband regions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-4-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
6bf436cf9d armv7m: Move NVICState struct definition into header
Move the NVICState struct definition into a header, so we can
embed it into other QOM objects like SoCs.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-3-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
3651c28569 armv7m: Abstract out the "load kernel" code
Abstract the "load kernel" code out of armv7m_init() into its own
function.  This includes the registration of the CPU reset function,
to parallel how we handle this for A profile cores.

We make the function public so that boards which choose to
directly instantiate an ARMv7M device object can call it.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-2-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
1bbe5dc66b Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170228' into staging
target-arm queue:
 * raspi2: implement RNG module
 * raspi2: implement new SD card controller (but don't wire it up)
 * sdhci: bugfixes for block transfers
 * virt: fix cpu object reference leak
 * Add missing fp_access_check() to aarch64 crypto instructions
 * cputlb: Don't assume do_unassigned_access() never returns
 * virt: Add a user option to disallow ITS instantiation
 * i.MX timers: fix reset handling
 * ARMv7M NVIC: rewrite to fix broken priority handling and masking
 * exynos: Fix proper mapping of CPUs by providing real cluster ID
 * exynos: Fix Linux kernel division by zero for PLLs

# gpg: Signature made Tue 28 Feb 2017 12:40:51 GMT
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170228: (27 commits)
  hw/arm/exynos: Fix proper mapping of CPUs by providing real cluster ID
  hw/arm/exynos: Fix Linux kernel division by zero for PLLs
  bcm2835_sdhost: add bcm2835 sdhost controller
  armv7m: Allow SHCSR writes to change pending and active bits
  armv7m: Raise correct kind of UsageFault for attempts to execute ARM code
  armv7m: Check exception return consistency
  armv7m: Extract "exception taken" code into functions
  armv7m: VECTCLRACTIVE and VECTRESET are UNPREDICTABLE
  armv7m: Simpler and faster exception start
  armv7m: Remove unused armv7m_nvic_acknowledge_irq() return value
  armv7m: Escalate exceptions to HardFault if necessary
  arm: gic: Remove references to NVIC
  armv7m: Fix condition check for taking exceptions
  armv7m: Rewrite NVIC to not use any GIC code
  armv7m: Implement reading and writing of PRIGROUP
  armv7m: Rename nvic_state to NVICState
  ARM i.MX timers: fix reset handling
  hw/arm/virt: Add a user option to disallow ITS instantiation
  cputlb: Don't assume do_unassigned_access() never returns
  Add missing fp_access_check() to aarch64 crypto instructions
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 14:50:17 +00:00
Peter Maydell
c8c0a1a784 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Tue 28 Feb 2017 04:34:34 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  iscsi: add missing colons to the qapi docs
  block/mirror: fix broken sparseness detection

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 13:41:03 +00:00
Peter Maydell
a57aaa4e74 Merge remote-tracking branch 'remotes/rth/tags/pull-axp-20170228' into staging
Enable MTTCG for Alpha guest

# gpg: Signature made Tue 28 Feb 2017 00:43:17 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-axp-20170228:
  target/alpha: Enable MTTCG by default

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 13:01:50 +00:00
Alex Bennée
1ed9251515 .shippable: add s390x-cross target
Use the new debian-s390x-cross.docker target to cross compile for
s390.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20170227143028.16428-3-alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:31:01 +08:00
Alex Bennée
267004d991 new: dockerfiles/debian-s390-cross
This adds an s390 cross build target to our library of docker setups.
There is an issue with the xfslibs-dev:s390x package having a clash so
we do a || apt-get -f install to fixup the rest of the dependencies.

This doesn't build on the debian.docker file as we are using the
multilib compiler which is only available in stretch (the current
testing repo).

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
CC: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <20170227143028.16428-2-alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:31:01 +08:00
Krzysztof Kozlowski
f3a6339a5b hw/arm/exynos: Fix proper mapping of CPUs by providing real cluster ID
The Exynos4210 has cluster ID 0x9 in its MPIDR register (raw value
0x8000090x).  If this cluster ID is not provided, then Linux kernel
cannot map DeviceTree nodes to MPIDR values resulting in kernel
warning and lack of any secondary CPUs:

    DT missing boot CPU MPIDR[23:0], fall back to default cpu_logical_map
    ...
    smp: Bringing up secondary CPUs ...
    smp: Brought up 1 node, 1 CPU
    SMP: Total of 1 processors activated (24.00 BogoMIPS).

Provide a cluster ID so Linux will see proper MPIDR and will try to
bring the secondary CPU online.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Message-id: 20170226200142.31169-2-krzk@kernel.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:20 +00:00
Krzysztof Kozlowski
1e0228fd20 hw/arm/exynos: Fix Linux kernel division by zero for PLLs
Without any clock controller, the Linux kernel was hitting division by
zero during boot or with clk_summary:
[    0.000000] [<c031054c>] (unwind_backtrace) from [<c030ba6c>] (show_stack+0x10/0x14)
[    0.000000] [<c030ba6c>] (show_stack) from [<c05b2660>] (dump_stack+0x88/0x9c)
[    0.000000] [<c05b2660>] (dump_stack) from [<c05b11a4>] (Ldiv0+0x8/0x10)
[    0.000000] [<c05b11a4>] (Ldiv0) from [<c06ad1e0>] (samsung_pll45xx_recalc_rate+0x58/0x74)
[    0.000000] [<c06ad1e0>] (samsung_pll45xx_recalc_rate) from [<c0692ec0>] (clk_register+0x39c/0x63c)
[    0.000000] [<c0692ec0>] (clk_register) from [<c125d360>] (samsung_clk_register_pll+0x2e0/0x3d4)
[    0.000000] [<c125d360>] (samsung_clk_register_pll) from [<c125d7e8>] (exynos4_clk_init+0x1b0/0x5e4)
[    0.000000] [<c125d7e8>] (exynos4_clk_init) from [<c12335f4>] (of_clk_init+0x17c/0x210)
[    0.000000] [<c12335f4>] (of_clk_init) from [<c1204700>] (time_init+0x24/0x2c)
[    0.000000] [<c1204700>] (time_init) from [<c1200b2c>] (start_kernel+0x24c/0x38c)
[    0.000000] [<c1200b2c>] (start_kernel) from [<4020807c>] (0x4020807c)

Provide stub for clock controller returning reset values for PLLs.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Message-id: 20170226200142.31169-1-krzk@kernel.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:20 +00:00
Clement Deschamps
43ddc182e2 bcm2835_sdhost: add bcm2835 sdhost controller
This adds the BCM2835 SDHost controller from Arasan.

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Message-id: 20170224164021.9066-2-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
5db53e353d armv7m: Allow SHCSR writes to change pending and active bits
Implement the NVIC SHCSR write behaviour which allows pending and
active status of some exceptions to be changed.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
e13886e3a7 armv7m: Raise correct kind of UsageFault for attempts to execute ARM code
M profile doesn't implement ARM, and the architecturally required
behaviour for attempts to execute with the Thumb bit clear is to
generate a UsageFault with the CFSR INVSTATE bit set.  We were
incorrectly implementing this as generating an UNDEFINSTR UsageFault;
fix this.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
aa488fe3bb armv7m: Check exception return consistency
Implement the exception return consistency checks
described in the v7M pseudocode ExceptionReturn().

Inspired by a patch from Michael Davidsaver's series, but
this is a reimplementation from scratch based on the
ARM ARM pseudocode.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
39ae2474e3 armv7m: Extract "exception taken" code into functions
Extract the code from the tail end of arm_v7m_do_interrupt() which
enters the exception handler into a pair of utility functions
v7m_exception_taken() and v7m_push_stack(), which correspond roughly
to the pseudocode PushStack() and ExceptionTaken().

This also requires us to move the arm_v7m_load_vector() utility
routine up so we can call it.

Handling illegal exception returns has some cases where we want to
take a UsageFault either on an existing stack frame or with a new
stack frame but with a specific LR value, so we want to be able to
call these without having to go via arm_v7m_cpu_do_interrupt().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Michael Davidsaver
14790f730a armv7m: VECTCLRACTIVE and VECTRESET are UNPREDICTABLE
The VECTCLRACTIVE and VECTRESET bits in the AIRCR are both
documented as UNPREDICTABLE if you write a 1 to them when
the processor is not halted in Debug state (ie stopped
and under the control of an external JTAG debugger).
Since we don't implement Debug state or emulated JTAG
these bits are always UNPREDICTABLE for us. Instead of
logging them as unimplemented we can simply log writes
as guest errors and ignore them.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: change extracted from another patch; commit message
 constructed from scratch]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Michael Davidsaver
a25dc805e2 armv7m: Simpler and faster exception start
All the places in armv7m_cpu_do_interrupt() which pend an
exception in the NVIC are doing so for synchronous
exceptions. We know that we will always take some
exception in this case, so we can just acknowledge it
immediately, rather than returning and then immediately
being called again because the NVIC has raised its outbound
IRQ line.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: tweaked commit message; added DEBUG to the set of
exceptions we handle immediately, since it is synchronous
when it results from the BKPT instruction]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Peter Maydell
a5d8235545 armv7m: Remove unused armv7m_nvic_acknowledge_irq() return value
Having armv7m_nvic_acknowledge_irq() return the new value of
env->v7m.exception and its one caller assign the return value
back to env->v7m.exception is pointless. Just make the return
type void instead.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Michael Davidsaver
a73c98e159 armv7m: Escalate exceptions to HardFault if necessary
The v7M exception architecture requires that if a synchronous
exception cannot be taken immediately (because it is disabled
or at too low a priority) then it should be escalated to
HardFault (and the HardFault exception is then taken).
Implement this escalation logic.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: extracted from another patch]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Michael Davidsaver
7c14b3ac07 arm: gic: Remove references to NVIC
Now that the NVIC is its own separate implementation, we can
clean up the GIC code by removing REV_NVIC and conditionals
which use it.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Peter Maydell
7ecdaa4a96 armv7m: Fix condition check for taking exceptions
The M profile condition for when we can take a pending exception or
interrupt is not the same as that for A/R profile.  The code
originally copied from the A/R profile version of the
cpu_exec_interrupt function only worked by chance for the
very simple case of exceptions being masked by PRIMASK.
Replace it with a call to a function in the NVIC code that
correctly compares the priority of the pending exception
against the current execution priority of the CPU.

[Michael Davidsaver's patchset had a patch to do something
similar but the implementation ended up being a rewrite.]

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Michael Davidsaver
da6d674e50 armv7m: Rewrite NVIC to not use any GIC code
Despite some superficial similarities of register layout, the
M-profile NVIC is really very different from the A-profile GIC.
Our current attempt to reuse the GIC code means that we have
significant bugs in our NVIC.

Implement the NVIC as an entirely separate device, to give
us somewhere we can get the behaviour correct.

This initial commit does not attempt to implement exception
priority escalation, since the GIC-based code didn't either.
It does fix a few bugs in passing:
 * ICSR.RETTOBASE polarity was wrong and didn't account for
   internal exceptions
 * ICSR.VECTPENDING was 16 too high if the pending exception
   was for an external interrupt
 * UsageFault, BusFault and MemFault were not disabled on reset
   as they are supposed to be

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: reworked, various bugs and stylistic cleanups]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Peter Maydell
1004102a77 armv7m: Implement reading and writing of PRIGROUP
Add a state field for the v7M PRIGROUP register and implent
reading and writing it. The current NVIC doesn't honour
the values written, but the new version will.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:16 +00:00
Peter Maydell
f797c07507 armv7m: Rename nvic_state to NVICState
Rename the nvic_state struct to NVICState, to match
our naming conventions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:16 +00:00
Kurban Mallachiev
c98c9eba88 ARM i.MX timers: fix reset handling
The i.MX timer device can be reset by writing to the SWR bit
of the CR register. This has to behave differently from hard
(power-on) reset because it does not reset all of the bits
in the CR register.

We were incorrectly implementing soft reset and hard reset
the same way, and in addition had a logic error which meant
that we were clearing the bits that soft-reset is supposed
to preserve and not touching the bits that soft-reset clears.
This was not correct behaviour for either kind of reset.

Separate out the soft reset and hard reset code paths, and
correct the handling of reset of the CR register so that it
is correct in both cases.

Signed-off-by: Kurban Mallachiev <mallachiev@ispras.ru>
[PMM: rephrased commit message, spacing on operators;
 use bool rather than int for is_soft_reset]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:16 +00:00
Eric Auger
ccc11b0279 hw/arm/virt: Add a user option to disallow ITS instantiation
In 2.9 ITS will block save/restore and migration use cases. As such,
let's introduce a user option that allows to turn its instantiation
off, along with GICv3. With the "its" option turned false, migration
will be possible, obviously at the expense of MSI support (with GICv3).

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487681108-14452-1-git-send-email-eric.auger@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:16 +00:00
Peter Maydell
44d7ce0ef3 cputlb: Don't assume do_unassigned_access() never returns
In get_page_addr_code(), if the guest PC doesn't correspond to RAM
then we currently run the CPU's do_unassigned_access() hook if it has
one, and otherwise we give up and exit QEMU with a more-or-less
useful message.  This code assumes that the do_unassigned_access hook
will never return, because if it does then we'll plough on attempting
to use a non-RAM TLB entry to get a RAM address and will abort() in
qemu_ram_addr_from_host_nofail().  Unfortunately some CPU
implementations of this hook do return: Microblaze, SPARC and the ARM
v7M.

Change the code to call report_bad_exec() if the hook returns, as
well as if it didn't have one.  This means we can tidy it up to use
the cpu_unassigned_access() function which wraps the "get the CPU
class and call the hook if it has one" work, since we aren't trying
to distinguish "no hook" from "hook existed and returned" any more.

This brings the handling of this hook into line with the handling
used for data accesses, where "hook returned" is treated the
same as "no hook existed" and gets you the default behaviour.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-28 12:08:15 +00:00
Nick Reilly
a4f5c5b723 Add missing fp_access_check() to aarch64 crypto instructions
The aarch64 crypto instructions for AES and SHA are missing the
check for if the FPU is enabled.

Signed-off-by: Nick Reilly <nreilly@blackberry.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:15 +00:00
Igor Mammedov
dbb74759fa hw/arm/virt: fix cpu object reference leak
object_new(FOO) returns an object with ref_cnt == 1
and following
  object_property_set_bool(cpuobj, true, "realized", NULL)
set parent of cpuobj to '/machine/unattached' which makes
ref_cnt == 2.

Since machvirt_init() doesn't take ownership of cpuobj
returned by object_new() it should explicitly drop
reference to cpuobj when dangling pointer is about to
go out of scope like it's done pc_new_cpu() to avoid
object leak.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-id: 1487253461-269218-1-git-send-email-imammedo@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:15 +00:00
Prasad J Pandit
241999bf4c sd: sdhci: Remove block count enable check in single block transfers
In SDHCI protocol, the 'Block count enable' bit of the Transfer
Mode register is relevant only in multi block transfers. We need
not check it in single block transfers.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-id: 20170214185225.7994-5-ppandit@redhat.com
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:15 +00:00
Prasad J Pandit
45ba9f761b sd: sdhci: conditionally invoke multi block transfer
In sdhci_write invoke multi block transfer if it is enabled
in the transfer mode register 's->trnmod'.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-id: 20170214185225.7994-4-ppandit@redhat.com
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:14 +00:00
Prasad J Pandit
6e86d90352 sd: sdhci: check transfer mode register in multi block transfer
In the SDHCI protocol, the transfer mode register value
is used during multi block transfer to check if block count
register is enabled and should be updated. Transfer mode
register could be set such that, block count register would
not be updated, thus leading to an infinite loop. Add check
to avoid it.

Reported-by: Wjjzhang <wjjzhang@tencent.com>
Reported-by: Jiang Xin <jiangxin1@huawei.com>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-id: 20170214185225.7994-3-ppandit@redhat.com
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:14 +00:00
Prasad J Pandit
8b20aefac4 sd: sdhci: mask transfer mode register value
In SDHCI protocol, the transfer mode register is defined
to be of 6 bits. Mask its value with '0x0037' so that an
invalid value could not be assigned.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Message-id: 20170214185225.7994-2-ppandit@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:14 +00:00
Peter Maydell
373442ea3a bcm2835_rng: Use qcrypto_random_bytes() rather than rand()
Switch to using qcrypto_random_bytes() rather than rand() as
our source of randomness for the BCM2835 RNG.

If qcrypto_random_bytes() fails, we don't want to return the guest a
non-random value in case they're really using it for cryptographic
purposes, so the best we can do is a fatal error.  This shouldn't
happen unless something's broken, though.

In theory we could implement this device's full FIFO and interrupt
semantics and then just stop filling the FIFO.  That's a lot of work,
though, and doesn't really give a very nice diagnostic to the user
since the guest will just seem to hang.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-28 12:08:14 +00:00
Marcin Chojnacki
54a5ba13a9 target-arm: Implement BCM2835 hardware RNG
Recent vanilla Raspberry Pi kernels started to make use of
the hardware random number generator in BCM2835 SoC. As a
result, those kernels wouldn't work anymore under QEMU
but rather just freeze during the boot process.

This patch implements a trivial BCM2835 compatible RNG,
and adds it as a peripheral to BCM2835 platform, which
allows to boot a vanilla Raspberry Pi kernel under Qemu.

Changes since v1:
 * Prevented guest from writing [31..20] bits in rng_status
 * Removed redundant minimum_version_id_old
 * Added field entries for the state
 * Changed realize function to reset

Signed-off-by: Marcin Chojnacki <marcinch7@gmail.com>
Message-id: 20170210210857.47893-1-marcinch7@gmail.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:13 +00:00
Peter Maydell
105d86ff38 Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-upstream-pull-request' into staging
# gpg: Signature made Mon 27 Feb 2017 22:15:47 GMT
# gpg:                using RSA key 0xF30C38BD3F2FBE3C
# gpg: Good signature from "Laurent Vivier <lvivier@redhat.com>"
# gpg:                 aka "Laurent Vivier <laurent@vivier.eu>"
# gpg:                 aka "Laurent Vivier (Red Hat) <lvivier@redhat.com>"
# Primary key fingerprint: CD2F 75DD C8E3 A4DC 2E4F  5173 F30C 38BD 3F2F BE3C

* remotes/vivier2/tags/linux-user-for-upstream-pull-request:
  syscall: fixed mincore(2) not failing with ENOMEM
  linux-user: fix do_rt_sigreturn on m68k linux userspace emulation
  linux-user: correctly manage SR in ucontext
  linux-user: Add signal handling support for x86_64
  linux-user: Add sockopts for IPv6 ping and IPv6 traceroute
  linux-user: fix fork()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:03:36 +00:00
Dr. David Alan Gilbert
665414ad06 postcopy: Add extra check for COPY function
As an extra sanity check, make sure the region we're registering
can perform UFFDIO_COPY;  the COPY will fail later but this
gives a cleaner failure.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-17-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
0c1f4036db postcopy: Add doc about hugepages and postcopy
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-16-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
7e8cafb713 postcopy: Check for userfault+hugepage feature
We need extra Linux kernel support (~4.11) to support userfaults
on hugetlbfs; check for them.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-15-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
61a502128b postcopy: Update userfaultfd.h header
Just the userfaultfd.h update from Paolo's header
update run;

* Drop this patch after Paolo's update goes in *

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170224182844.32452-14-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
433bd0223c postcopy: Allow hugepages
Allow huge pages in postcopy.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-13-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
4c011c37ec postcopy: Send whole huge pages
The RAM save code uses ram_save_host_page to send whole
host pages at a time;  change this to use the host page size associated
with the RAM Block which may be a huge page.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-12-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
332847f075 postcopy: Mask fault addresses to huge page boundary
Currently the fault address received by userfault is rounded to
the host page boundary and a host page is requested from the source.
Use the current RAMBlock page size instead of the general host page
size so that for RAMBlocks backed by huge pages we request the whole
huge page.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-11-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
28abd20014 postcopy: Load huge pages in one go
The existing postcopy RAM load loop already ensures that it
glues together whole host-pages from the target page size chunks sent
over the wire.  Modify the definition of host page that it uses
to be the RAM block page size and thus be huge pages where appropriate.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-10-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
41d84210d4 postcopy: Use temporary for placing zero huge pages
The kernel can't do UFFDIO_ZEROPAGE for huge pages, so we have
to allocate a temporary (always zero) page and use UFFDIO_COPYPAGE
on it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-9-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
df9ff5e1e3 postcopy: Plumb pagesize down into place helpers
Now we deal with normal size pages and huge pages we need
to tell the place handlers the size we're dealing with
and make sure the temporary page is large enough.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-8-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
67f11b5c23 postcopy: Record largest page size
Record the largest page size in use; we'll need it soon for allocating
temporary buffers.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-7-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
e2fa71f527 postcopy: enhance ram_block_discard_range for hugepages
Unfortunately madvise DONTNEED doesn't work on hugepagetlb
so use fallocate(FALLOC_FL_PUNCH_HOLE)
qemu_fd_getpagesize only sets the page based off a file
if the file is from hugetlbfs.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-6-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
d3a5038c46 exec: ram_block_discard_range
Create ram_block_discard_range in exec.c to replace
postcopy_ram_discard_range and most of ram_discard_range.

Those two routines are a bit of a weird combination, and
ram_discard_range is about to get more complex for hugepages.
It's OS dependent code (so shouldn't be in migration/ram.c) but
it needs quite a bit of the innards of RAMBlock so doesn't belong in
the os*.c.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-5-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
29c5917201 postcopy: Chunk discards for hugepages
At the start of the postcopy phase, partially sent huge pages
must be discarded.  The code for dealing with host page sizes larger
than the target page size can be reused for this case.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-4-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
ef08fb389f postcopy: Transmit and compare individual page sizes
When using postcopy with hugepages, we require the source
and destination page sizes for any RAMBlock to match; note
that different RAMBlocks in the same VM can have different
page sizes.

Transmit them as part of the RAM information header and
fail if there's a difference.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170224182844.32452-3-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
e8ca1db29b postcopy: Transmit ram size summary word
Replace the host page-size in the 'advise' command by a pagesize
summary bitmap; if the VM is just using normal RAM then
this will be exactly the same as before, however if they're using
huge pages they'll be different, and thus:
   a) Migration from/to old qemu's that don't understand huge pages
      will fail early.
   b) Migrations with different size RAMBlocks will also fail early.

This catches it very early; earlier than the detailed per-block
check in the next patch.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170224182844.32452-2-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Vladimir Sementsov-Ogievskiy
f9c8caa04f migration: fix use-after-free of to_dst_file
hmp_savevm calls qemu_savevm_state(f), which sets to_dst_file=f in
global migration state. Then hmp_savevm closes f (g_free called).

Next access to to_dst_file in migration state (for example,
qmp_migrate_set_speed) will use it after it was freed.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170225193155.447462-5-vsementsov@virtuozzo.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
5f9412bbac migration: Update docs to discourage version bumps
Version bumps break backwards migration; update the docs
to explain to people that's bad and how to avoid it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170210110359.8210-1-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Marc-André Lureau
128e4e1089 migration: fix id leak regression
This leak was introduced in commit
581f08bac2.

(it stands out quickly with ASAN once the rest of the leaks are also
removed from make check with this series)

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Juan Quintela <quintela@redhat.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170221141451.28305-31-marcandre.lureau@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Ashijeet Acharya
7562f90707 migrate: Introduce a 'dc->vmsd' check to avoid segfault for --only-migratable
Commit a3a3d8c7 introduced a segfault bug while checking for
'dc->vmsd->unmigratable' which caused QEMU to crash when trying to add
devices which do no set their 'dc->vmsd' yet while initialization.
Place a 'dc->vmsd' check prior to it so that we do not segfault for
such devices.

NOTE: This doesn't compromise the functioning of --only-migratable
option as all the unmigratable devices do set their 'dc->vmsd'.

Introduce a new function check_migratable() and move the
only_migratable check inside it, also use stubs to avoid user-mode qemu
build failures.

Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
Message-Id: <1487009088-23891-1-git-send-email-ashijeetacharya@gmail.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Laurent Vivier
9cd49026aa vmstate-static-checker: update white list with spapr_pci
To fix migration between 2.7 and 2.8, some fields have
been renamed and managed with the help of a PHB property
(pre_2_8_migration):

    5c4537b spapr: Fix 2.7<->2.8 migration of PCI host bridge

So we need to add them to the white list:

    dma_liobn[0],
    mem_win_addr, mem_win_size,
    io_win_addr, io_win_size

become

    mig_liobn,
    mig_mem_win_addr, mig_mem_win_size,
    mig_io_win_addr, mig_io_win_size

CC: David Gibson <david@gibson.dropbear.id.au>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Thomas Huth <thuth@redhat.com>
CC: Greg Kurz <groug@kaod.org>
CC: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170214133331.28997-1-lvivier@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Halil Pasic
4333309961 tests/test-vmstate.c: test array of ptr to primitive
Let's have a test for ptr arrays to some primitive type with some
not-null and null ptrs intermixed.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

Message-Id: <20170222160119.52771-6-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Halil Pasic
cc95883185 tests/test-vmstate.c: test array of ptr with null
Add test for VMSTATE_ARRAY_OF_POINTER_TO_STRUCT with an array
containing some null pointer.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-5-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
   Fixed type case in assert to uintptr_t rather than uint64_t
2017-02-28 11:30:06 +00:00
Halil Pasic
07d4e69147 migration/vmstate: fix array of ptr with nullptrs
Make VMS_ARRAY_OF_POINTER cope with null pointers. Previously the
reward for trying to migrate an array with some null pointers in it was
an illegal memory access, that is a swift and painless death of the
process.  Let's make vmstate cope with this scenario.

The general approach is, when we encounter a null pointer (element),
instead of following the pointer to save/load the data behind it, we
save/load a placeholder. This way we can detect if we expected a null
pointer at the load side but not null data was saved instead.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Guenther Hutzl <hutzl@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-4-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Halil Pasic
cbfda0e6cf migration/vmstate: split up vmstate_base_addr
Currently vmstate_base_addr does several things: it pinpoints the field
within the struct, possibly allocates memory and possibly does the first
pointer dereference. Obviously allocation is needed only for load.

Let us split up the functionality in vmstate_base_addr and move the
address manipulations (that is everything but the allocation logic) to
load and save so it becomes more obvious what is actually going on. Like
this all the address calculations (and the handling of the flags
controlling these) is in one place and the sequence is more obvious.

The newly introduced function vmstate_handle_alloc also fixes the
allocation for the unused VMS_VBUFFER|VMS_MULTIPLY|VMS_ALLOC scenario
and is substantially simpler than the original vmstate_base_addr.

In load and save some asserts are added so it's easier to debug
situations where we would end up with a null pointer dereference.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-3-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Halil Pasic
e84641f73d migration/vmstate: renames in (load|save)_state
The vmstate_(load|save)_state start out with an a void *opaque pointing
to some struct, and manipulate one or more elements of one field within
that struct.

First the field within the struct is pinpointed as opaque + offset, then
if this is a pointer the pointer is dereferenced to obtain a pointer to
the first element of the vmstate field. Pointers to further elements if
any are calculated as first_element + i * element_size (where i is the
zero based index of the element in question).

Currently base_addr and addr is used as a variable name for the pointer
to the first element and the pointer to the current element being
processed. This is suboptimal because base_addr is somewhat
counter-intuitive (because obtained as base + offset) and both base_addr
and addr not very descriptive (that we have a pointer should be clear
from the fact that it is declared as a pointer).

Let make things easier to understand by renaming base_addr to first_elem
and addr to curr_elem. This has the additional benefit of harmonizing
with other names within the scope (n_elems, vmstate_n_elems).

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-2-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Daniel Henrique Barboza
87c9cc1c30 Changing error message of QMP 'migrate_set_downtime' to seconds
Using QMP, the error message of 'migrate_set_downtime' was displaying
the values in milliseconds, being misleading with the command that
accepts the value in seconds:

{ "execute": "migrate_set_downtime", "arguments": {"value": 3000}}
{"error": {"class": "GenericError", "desc": "Parameter 'downtime_limit'
expects an integer in the range of 0 to 2000000 milliseconds"}}

This message is also seen in HMP when trying to set the same
parameter:

(qemu) migrate_set_parameter downtime-limit 3000000
Parameter 'downtime_limit' expects an integer in the range of 0 to
2000000 milliseconds

To allow for a proper error message when using QMP, a validation
of the user input was added in 'qmp_migrate_set_downtime'.

Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
Message-Id: <20170222151729.5812-1-danielhb@linux.vnet.ibm.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Cornelia Huck
e8ebf60f6d pc-bios/s390-ccw.img: rebuild image
Contains the following commits:
- pc-bios/s390-ccw: Use the ccw bios to start the network boot

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
99b72e0fbb pc-bios/s390-ccw: Use the ccw bios to start the network boot
We want to use the ccw bios to start final network boot. To do
this we use ccw bios to detect if the boot device is a virtio
network device and retrieve the start address of the
network boot image.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
f38b5b7fc4 s390x/ipl: Load network boot image
Load the network boot image into guest RAM when the boot
device selected is a network device. Use some of the reserved
space in IplBlockCcw to store the start address of the netboot
image.

A user could also use 'chreipl'(diag 308/5) to change the boot device.
So every time we update the IPLB, we need to verify if the selected
boot device is a network device so we can appropriately load the
network boot image.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
5f31ade055 s390x/ipl: Extend S390IPLState to support network boot
Add new field to S390IPLState to store the name of the network boot
loader.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
34f1b23f8a elf-loader: Allow late loading of elf
The current QEMU ROM infrastructure rejects late loading of ROMs.
And ELFs are currently loaded as ROM, this prevents delayed loading
of ELFs. So when loading ELF, allow the user to specify if ELF should
be loaded as ROM or not.

If an ELF is not loaded as ROM, then they are not restored on a
guest reboot/reset and so its upto the user to handle the reloading.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Greg Kurz
c23d5f1d5b 9pfs: local: drop unused code
Now that the all callbacks have been converted to use "at" syscalls, we
can drop this code.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
a565fea565 9pfs: local: open2: don't follow symlinks
The local_open2() callback is vulnerable to symlink attacks because it
calls:

(1) open() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one
(4) local_post_create_passthrough() which calls in turn lchown() and
    chmod(), both functions also following symbolic links

This patch converts local_open2() to rely on opendir_nofollow() and
mkdirat() to fix (1), as well as local_set_xattrat(),
local_set_mapped_file_attrat() and local_set_cred_passthrough() to
fix (2), (3) and (4) respectively. Since local_open2() already opens
a descriptor to the target file, local_set_cred_passthrough() is
modified to reuse it instead of opening a new one.

The mapped and mapped-file security modes are supposed to be identical,
except for the place where credentials and file modes are stored. While
here, we also make that explicit by sharing the call to openat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
3f3a16990b 9pfs: local: mkdir: don't follow symlinks
The local_mkdir() callback is vulnerable to symlink attacks because it
calls:

(1) mkdir() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one
(4) local_post_create_passthrough() which calls in turn lchown() and
    chmod(), both functions also following symbolic links

This patch converts local_mkdir() to rely on opendir_nofollow() and
mkdirat() to fix (1), as well as local_set_xattrat(),
local_set_mapped_file_attrat() and local_set_cred_passthrough() to
fix (2), (3) and (4) respectively.

The mapped and mapped-file security modes are supposed to be identical,
except for the place where credentials and file modes are stored. While
here, we also make that explicit by sharing the call to mkdirat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
d815e72190 9pfs: local: mknod: don't follow symlinks
The local_mknod() callback is vulnerable to symlink attacks because it
calls:

(1) mknod() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one
(4) local_post_create_passthrough() which calls in turn lchown() and
    chmod(), both functions also following symbolic links

This patch converts local_mknod() to rely on opendir_nofollow() and
mknodat() to fix (1), as well as local_set_xattrat() and
local_set_mapped_file_attrat() to fix (2) and (3) respectively.

A new local_set_cred_passthrough() helper based on fchownat() and
fchmodat_nofollow() is introduced as a replacement to
local_post_create_passthrough() to fix (4).

The mapped and mapped-file security modes are supposed to be identical,
except for the place where credentials and file modes are stored. While
here, we also make that explicit by sharing the call to mknodat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
38771613ea 9pfs: local: symlink: don't follow symlinks
The local_symlink() callback is vulnerable to symlink attacks because it
calls:

(1) symlink() which follows symbolic links for all path elements but the
    rightmost one
(2) open(O_NOFOLLOW) which follows symbolic links for all path elements but
    the rightmost one
(3) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(4) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one

This patch converts local_symlink() to rely on opendir_nofollow() and
symlinkat() to fix (1), openat(O_NOFOLLOW) to fix (2), as well as
local_set_xattrat() and local_set_mapped_file_attrat() to fix (3) and
(4) respectively.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
d369f20763 9pfs: local: chown: don't follow symlinks
The local_chown() callback is vulnerable to symlink attacks because it
calls:

(1) lchown() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one

This patch converts local_chown() to rely on open_nofollow() and
fchownat() to fix (1), as well as local_set_xattrat() and
local_set_mapped_file_attrat() to fix (2) and (3) respectively.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
e3187a45dd 9pfs: local: chmod: don't follow symlinks
The local_chmod() callback is vulnerable to symlink attacks because it
calls:

(1) chmod() which follows symbolic links for all path elements
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one

We would need fchmodat() to implement AT_SYMLINK_NOFOLLOW to fix (1). This
isn't the case on linux unfortunately: the kernel doesn't even have a flags
argument to the syscall :-\ It is impossible to fix it in userspace in
a race-free manner. This patch hence converts local_chmod() to rely on
open_nofollow() and fchmod(). This fixes the vulnerability but introduces
a limitation: the target file must readable and/or writable for the call
to openat() to succeed.

It introduces a local_set_xattrat() replacement to local_set_xattr()
based on fsetxattrat() to fix (2), and a local_set_mapped_file_attrat()
replacement to local_set_mapped_file_attr() based on local_fopenat()
and mkdirat() to fix (3). No effort is made to factor out code because
both local_set_xattr() and local_set_mapped_file_attr() will be dropped
when all users have been converted to use the "at" versions.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
ad0b46e6ac 9pfs: local: link: don't follow symlinks
The local_link() callback is vulnerable to symlink attacks because it calls:

(1) link() which follows symbolic links for all path elements but the
    rightmost one
(2) local_create_mapped_attr_dir()->mkdir() which follows symbolic links
    for all path elements but the rightmost one

This patch converts local_link() to rely on opendir_nofollow() and linkat()
to fix (1), mkdirat() to fix (2).

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
6dd4b1f1d0 9pfs: local: improve error handling in link op
When using the mapped-file security model, we also have to create a link
for the metadata file if it exists. In case of failure, we should rollback.

That's what this patch does.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
d2767edec5 9pfs: local: rename: use renameat
The local_rename() callback is vulnerable to symlink attacks because it
uses rename() which follows symbolic links in all path elements but the
rightmost one.

This patch simply transforms local_rename() into a wrapper around
local_renameat() which is symlink-attack safe.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
99f2cf4b2d 9pfs: local: renameat: don't follow symlinks
The local_renameat() callback is currently a wrapper around local_rename()
which is vulnerable to symlink attacks.

This patch rewrites local_renameat() to have its own implementation, based
on local_opendir_nofollow() and renameat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
f9aef99b3e 9pfs: local: lstat: don't follow symlinks
The local_lstat() callback is vulnerable to symlink attacks because it
calls:

(1) lstat() which follows symbolic links in all path elements but the
    rightmost one
(2) getxattr() which follows symbolic links in all path elements
(3) local_mapped_file_attr()->local_fopen()->openat(O_NOFOLLOW) which
    follows symbolic links in all path elements but the rightmost
    one

This patch converts local_lstat() to rely on opendir_nofollow() and
fstatat(AT_SYMLINK_NOFOLLOW) to fix (1), fgetxattrat_nofollow() to
fix (2).

A new local_fopenat() helper is introduced as a replacement to
local_fopen() to fix (3). No effort is made to factor out code
because local_fopen() will be dropped when all users have been
converted to call local_fopenat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
bec1e9546e 9pfs: local: readlink: don't follow symlinks
The local_readlink() callback is vulnerable to symlink attacks because it
calls:

(1) open(O_NOFOLLOW) which follows symbolic links for all path elements but
    the rightmost one
(2) readlink() which follows symbolic links for all path elements but the
    rightmost one

This patch converts local_readlink() to rely on open_nofollow() to fix (1)
and opendir_nofollow(), readlinkat() to fix (2).

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
ac125d993b 9pfs: local: truncate: don't follow symlinks
The local_truncate() callback is vulnerable to symlink attacks because
it calls truncate() which follows symbolic links in all path elements.

This patch converts local_truncate() to rely on open_nofollow() and
ftruncate() instead.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
31e51d1c15 9pfs: local: statfs: don't follow symlinks
The local_statfs() callback is vulnerable to symlink attacks because it
calls statfs() which follows symbolic links in all path elements.

This patch converts local_statfs() to rely on open_nofollow() and fstatfs()
instead.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
a33eda0dd9 9pfs: local: utimensat: don't follow symlinks
The local_utimensat() callback is vulnerable to symlink attacks because it
calls qemu_utimens()->utimensat(AT_SYMLINK_NOFOLLOW) which follows symbolic
links in all path elements but the rightmost one or qemu_utimens()->utimes()
which follows symbolic links for all path elements.

This patch converts local_utimensat() to rely on opendir_nofollow() and
utimensat(AT_SYMLINK_NOFOLLOW) directly instead of using qemu_utimens().
It is hence assumed that the OS supports utimensat(), i.e. has glibc 2.6
or higher and linux 2.6.22 or higher, which seems reasonable nowadays.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
a0e640a872 9pfs: local: remove: don't follow symlinks
The local_remove() callback is vulnerable to symlink attacks because it
calls:

(1) lstat() which follows symbolic links in all path elements but the
    rightmost one
(2) remove() which follows symbolic links in all path elements but the
    rightmost one

This patch converts local_remove() to rely on opendir_nofollow(),
fstatat(AT_SYMLINK_NOFOLLOW) to fix (1) and unlinkat() to fix (2).

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
df4938a665 9pfs: local: unlinkat: don't follow symlinks
The local_unlinkat() callback is vulnerable to symlink attacks because it
calls remove() which follows symbolic links in all path elements but the
rightmost one.

This patch converts local_unlinkat() to rely on opendir_nofollow() and
unlinkat() instead.

Most of the code is moved to a separate local_unlinkat_common() helper
which will be reused in a subsequent patch to fix the same issue in
local_remove().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
72f0d0bf51 9pfs: local: lremovexattr: don't follow symlinks
The local_lremovexattr() callback is vulnerable to symlink attacks because
it calls lremovexattr() which follows symbolic links in all path elements
but the rightmost one.

This patch introduces a helper to emulate the non-existing fremovexattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to lremovexattr().

local_lremovexattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
3e36aba757 9pfs: local: lsetxattr: don't follow symlinks
The local_lsetxattr() callback is vulnerable to symlink attacks because
it calls lsetxattr() which follows symbolic links in all path elements but
the rightmost one.

This patch introduces a helper to emulate the non-existing fsetxattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to lsetxattr().

local_lsetxattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
5507904e36 9pfs: local: llistxattr: don't follow symlinks
The local_llistxattr() callback is vulnerable to symlink attacks because
it calls llistxattr() which follows symbolic links in all path elements but
the rightmost one.

This patch introduces a helper to emulate the non-existing flistxattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to llistxattr().

local_llistxattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
56ad3e54da 9pfs: local: lgetxattr: don't follow symlinks
The local_lgetxattr() callback is vulnerable to symlink attacks because
it calls lgetxattr() which follows symbolic links in all path elements but
the rightmost one.

This patch introduces a helper to emulate the non-existing fgetxattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to lgetxattr().

local_lgetxattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
996a0d76d7 9pfs: local: open/opendir: don't follow symlinks
The local_open() and local_opendir() callbacks are vulnerable to symlink
attacks because they call:

(1) open(O_NOFOLLOW) which follows symbolic links in all path elements but
    the rightmost one
(2) opendir() which follows symbolic links in all path elements

This patch converts both callbacks to use new helpers based on
openat_nofollow() to only open files and directories if they are
below the virtfs shared folder

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
0e35a37829 9pfs: local: keep a file descriptor on the shared folder
This patch opens the shared folder and caches the file descriptor, so that
it can be used to do symlink-safe path walk.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
6482a96163 9pfs: introduce relative_openat_nofollow() helper
When using the passthrough security mode, symbolic links created by the
guest are actual symbolic links on the host file system.

Since the resolution of symbolic links during path walk is supposed to
occur on the client side. The server should hence never receive any path
pointing to an actual symbolic link. This isn't guaranteed by the protocol
though, and malicious code in the guest can trick the server to issue
various syscalls on paths whose one or more elements are symbolic links.
In the case of the "local" backend using the "passthrough" or "none"
security modes, the guest can directly create symbolic links to arbitrary
locations on the host (as per spec). The "mapped-xattr" and "mapped-file"
security modes are also affected to a lesser extent as they require some
help from an external entity to create actual symbolic links on the host,
i.e. another guest using "passthrough" mode for example.

The current code hence relies on O_NOFOLLOW and "l*()" variants of system
calls. Unfortunately, this only applies to the rightmost path component.
A guest could maliciously replace any component in a trusted path with a
symbolic link. This could allow any guest to escape a virtfs shared folder.

This patch introduces a variant of the openat() syscall that successively
opens each path element with O_NOFOLLOW. When passing a file descriptor
pointing to a trusted directory, one is guaranteed to be returned a
file descriptor pointing to a path which is beneath the trusted directory.
This will be used by subsequent patches to implement symlink-safe path walk
for any access to the backend.

Symbolic links aren't the only threats actually: a malicious guest could
change a path element to point to other types of file with undesirable
effects:
- a named pipe or any other thing that would cause openat() to block
- a terminal device which would become QEMU's controlling terminal

These issues can be addressed with O_NONBLOCK and O_NOCTTY.

Two helpers are introduced: one to open intermediate path elements and one
to open the rightmost path element.

Suggested-by: Jann Horn <jannh@google.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
(renamed openat_nofollow() to relative_openat_nofollow(),
 assert path is relative and doesn't contain '//',
 fixed side-effect in assert, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 11:21:15 +01:00
Greg Kurz
21328e1e57 9pfs: remove side-effects in local_open() and local_opendir()
If these functions fail, they should not change *fs. Let's use local
variables to fix this.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:14 +01:00
Greg Kurz
00c90bd1c2 9pfs: remove side-effects in local_init()
If this function fails, it should not modify *ctx.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:14 +01:00
Greg Kurz
56fc494bdc 9pfs: local: move xattr security ops to 9p-xattr.c
These functions are always called indirectly. It really doesn't make sense
for them to sit in a header file.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:14 +01:00
Pradeep Jagadeesh
a2a7862ca9 throttle: factor out duplicate code
This patch removes the redundant throttle code that was present in
block and fsdev device files. Now the common code is moved
to a single file.

Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
(fix indent nit, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 10:31:46 +01:00
Pradeep Jagadeesh
b8bbdb886e fsdev: add IO throttle support to fsdev devices
This patchset adds the throttle support for the 9p-local driver.
For now this functionality can be enabled only through qemu cli options.
QMP interface and support to other drivers need further extensions.
To make it simple for other 9p drivers, the throttle code has been put in
separate files.

Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
(pass extra NULL CoMutex * argument to qemu_co_queue_wait(),
 added options to qemu-options.hx, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 10:31:46 +01:00
Paolo Bonzini
4bae2b397f 9pfs: fix v9fs_lock error case
In this case, we are marshaling an error status instead of the errno value.
Reorganize the out and out_nofid labels to look like all the other cases.
Coverity reports this because the "err = -ENOENT" and "err = -EINVAL"
assignments above are dead, overwritten by the call to pdu_marshal.

(Coverity issues CID1348512 and CID1348513)

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(also open-coded the success path since locking is a nop for us, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 10:31:46 +01:00
Gerd Hoffmann
8779fccbef seabios: update to 1.10.2 release
git shortlog rel-1.10.1..rel-1.10.2
===================================

Ben Warren (5):
      QEMU DMA: Add DMA write capability
      romfile-loader: Switch to using named structs
      QEMU fw_cfg: Add command to write back address of file
      QEMU fw_cfg: Add functions for accessing files by key
      QEMU fw_cfg: Write fw_cfg back on S3 resume

Kevin O'Connor (1):
      ps2port: Disable keyboard/mouse prior to resetting ps2 controller

Ladi Prosek (1):
      ahci: Set upper 32-bit registers to zero

Paul Menzel (1):
      vgasrc: Increase debug level

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-28 09:54:23 +01:00
Peter Maydell
6181478f63 Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Mon 27 Feb 2017 16:33:23 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  tests-aio-multithread: use atomic_read properly
  iscsi: do not use aio_context_acquire/release
  nfs: do not use aio_context_acquire/release
  curl: do not use aio_context_acquire/release

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 08:46:03 +00:00
Franklin \"Snaipe\" Mathieu
f5507e0448 syscall: fixed mincore(2) not failing with ENOMEM
The current implementation of the mincore(2) syscall sets errno to
EFAULT when the region identified by the first two parameters is
invalid.

This goes against the man page specification, where mincore(2) should
only fail with EFAULT when the third parameter is an invalid address;
and fail with ENOMEM when the checked region does not point to mapped
memory.

Signed-off-by: Franklin "Snaipe" Mathieu <snaipe@diacritic.io>
Cc: Riku Voipio <riku.voipio@linaro.org>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Igor Pavlikevich
6c608953a5 hw/acpi/tco.c: fix tco timer stop
TCO timer does not actually stop

Signed-off-by: Igor Pavlikevich <ipavlikevich@gmail.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Peter Maydell
3d74ee7dca lm32: milkymist-tmu2: fix a third integer overflow
Don't truncate the multiplication and do a 64 bit one instead
because the result is stored in a 64 bit variable.

This fixes a similar coverity warning to commits 237a8650d6 and
4382fa6554, in a similar way, and is the final third of the fix for
coverity CID 1167561 (hopefully!).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Acked-by: Michael Walle <michael@walle.cc>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Vincenzo Maffione
79cad2faac qemu-options.hx: add missing id=chr0 chardev argument in vhost-user example
In the vhost-user example, a chardev with id chr0 is referenced by the
vhost-user net backend, but the id is not specified in the chardev option.

Signed-off-by: Vincenzo Maffione <v.maffione@gmail.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Marc-André Lureau
d34d5b3bbd Update copyright year
It's still time to wish happy new year!

The Year of the Rooster will begin on January 28, 2017!

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Thomas Huth
6b591ad613 tests/prom-env: Enable the test for the sun4u machine, too
The 32-bit TCG bug has been fixed a while ago, so we can enable
this test for sparc64 now, too. Unfortunately, OpenBIOS does not
work with the sun4v machine anymore (it needs to catch up with the
improved emulation), so we can only enable this test for the sun4u
machine right now.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Fam Zheng
8ea1d05632 cadence_gem: Remove unused parameter debug message
Reported by cppcheck.

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Philippe Mathieu-Daudé
4729b3a41d register: fix incorrect read mask
The register_read() and register_write() functions expect a bitmask argument.
To avoid duplicated code, a new inlined function register_enabled_mask() is
introduced.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
John Snow
6048018ef6 ide: remove undefined behavior in ide-test
trivial: initialize the dirty buffer with a random-ish byte.
Stops valgrind from whining about uninitialized buffers.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Peter Maydell
25ac5bbec4 CODING_STYLE: Mention preferred comment form
Our defacto coding style strongly prefers /* */ style comments
over the single-line // style, and checkpatch enforces this,
but we don't actually document this. Mention it in CODING_STYLE.

Suggested-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Thomas Huth
a6e3707ece hw/core/register: Mark the device with cannot_instantiate_with_device_add_yet
The "qemu,register" device needs to be wired up in source code, there
is no way the user can make any real use of this device with the
"-device" parameter or the "device_add" monitor command yet.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Thomas Huth
a70716eb2c hw/core/or-irq: Mark the device with cannot_instantiate_with_device_add_yet
The "or-irq" device needs to be wired up in source code, there is no
way the user can make any real use of this device with the "-device"
parameter or the "device_add" monitor command yet.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Peter Maydell
d000b477f2 softfloat: Use correct type in float64_to_uint64_round_to_zero()
In float64_to_uint64_round_to_zero() a typo meant that we were
taking the uint64_t return value from float64_to_uint64() and
putting it into an int64_t variable before returning it as
uint64_t again. Use uint64_t instead of pointlessly casting it
back and forth to int64_t.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Stefan Weil
8dc52350f9 target/s390x: Fix typo
Signed-off-by: Stefan Weil <sw@weilnetz.de>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Jeff Cody
51654aa52a iscsi: add missing colons to the qapi docs
The missing colons make the iscsi part of the documentation not render
quite as nicely, so add those in.

Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-27 23:33:41 -05:00
Richard Henderson
5ee4f3c2c7 target/alpha: Enable MTTCG by default
Alpha has a weak memory ordering and issues all of the required barriers.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-28 11:41:46 +11:00
Franklin \"Snaipe\" Mathieu
98a3331a55 syscall: fixed mincore(2) not failing with ENOMEM
The current implementation of the mincore(2) syscall sets errno to
EFAULT when the region identified by the first two parameters is
invalid.

This goes against the man page specification, where mincore(2) should
only fail with EFAULT when the third parameter is an invalid address;
and fail with ENOMEM when the checked region does not point to mapped
memory.

Signed-off-by: Franklin "Snaipe" Mathieu <snaipe@diacritic.io>
Cc: Riku Voipio <riku.voipio@linaro.org>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170217085800.28873-2-snaipe@diacritic.io>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Michael Karcher
59ebb6e451 linux-user: fix do_rt_sigreturn on m68k linux userspace emulation
do_rt_sigreturn uses an uninitialised local variable instead of fetching
the old signal mask directly from the signal frame when restoring the mask,
so the signal mask is undefined after do_rt_sigreturn. As the signal
frame data is in target-endian order, target_to_host_sigset instead of
target_to_host_sigset_internal is required.

do_sigreturn is correct in using target_to_host_sigset_internal, because
get_user already did the endianness conversion.

Signed-off-by: Michael Karcher <karcher@physik.fu-berlin.de>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170225110517.2832-3-laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Laurent Vivier
3219de458c linux-user: correctly manage SR in ucontext
Use cpu_m68k_get_ccr()/cpu_m68k_set_ccr() to setup and restore correctly
the value of SR in the ucontext structure

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170225110517.2832-2-laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Pranith Kumar
1c1df0198b linux-user: Add signal handling support for x86_64
Note that x86_64 has only _rt signal handlers. This implementation
attempts to share code with the x86_32 implementation.

CC: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Allan Wirth <awirth@akamai.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170226165345.8757-1-bobby.prani@gmail.com>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Helge Deller
ee1ac3a182 linux-user: Add sockopts for IPv6 ping and IPv6 traceroute
Add the neccessary sockopts for ping and traceroute on IPv6.

This fixes the following qemu warnings with IPv6:
Unsupported ancillary data: 0/2
Unsupported ancillary data: 0/11
Unsupported ancillary data: 41/25
Unsupported setsockopt level=0 optname=12
Unsupported setsockopt level=41 optname=16
Unsupported setsockopt level=41 optname=25
Unsupported setsockopt level=41 optname=50
Unsupported setsockopt level=41 optname=51
Unsupported setsockopt level=41 optname=8
Unsupported setsockopt level=58 optname=1

Tested with hppa-linux-user (big-endian) on x86_64 (little-endian).

Signed-off-by: Helge Deller <deller@gmx.de>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170218223130.GA25278@ls3530.fritz.box>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Laurent Vivier
7eddb5ddac linux-user: fix fork()
Since commit 5ea2fc8 ("linux-user: Sanity check clone flags"),
trying to run fork() fails with old distro on some architectures.

This is the case with HP-PA and Debian 5 (Lenny).

It fails on:

         if ((flags & CSIGNAL) != TARGET_SIGCHLD) {
             return -TARGET_EINVAL;
         }

because flags is 17, whereas on HP-PA, SIGCHLD is 18.
17 is the SIGCHLD value of my host (x86_64).

It appears that for TARGET_NR_fork and TARGET_NR_vfork, QEMU calls
do_fork() with SIGCHLD instead of TARGET_SIGCHLD.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170216173707.16209-1-laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Peter Maydell
9b9fbe8a4e Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20170227-1' into staging
gtk: fix kbd on xwayland
vnc: fix double free issues
opengl improvements

# gpg: Signature made Mon 27 Feb 2017 16:11:30 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-ui-20170227-1:
  vnc: fix double free issues
  spice: add display & head options
  ui: Use XkbGetMap and XkbGetNames instead of XkbGetKeyboard
  gtk-egl: add scanout_disable support
  sdl2: add scanout_disable support
  spice: add scanout_disable support
  virtio-gpu: use dpy_gl_scanout_disable
  console: add dpy_gl_scanout_disable
  console: rename dpy_gl_scanout to dpy_gl_scanout_texture

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-27 19:19:46 +00:00
John Snow
39c11580f3 block/mirror: fix broken sparseness detection
int64_t is in all likelihood the actual scalar type we want.
Yep, really.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1219541

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-27 14:02:31 -05:00
Eduardo Habkost
b8097deb35 i386: Improve query-cpu-model-expansion full mode
This keeps the same results on type=static expansion, but make
type=full expansion return every single QOM property on the CPU
object that have a different value from the "base' CPU model,
plus all the CPU feature flag properties.

Cc: Jiri Denemark <jdenemar@redhat.com>
Message-Id: <20170222190029.17243-4-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:35 -03:00
Eduardo Habkost
f99fd7ca2a i386: Implement query-cpu-model-expansion QMP command
Implement query-cpu-model-expansion for target-i386.

This should meet all the requirements while being simple. In the
case of static expansion, it will use the new "base" CPU model,
and in the case of full expansion, it will keep the original CPU
model name+props, and append extra properties.

A future follow-up should improve the implementation of
type=full, so that it returns more detailed data, including every
writable QOM property in the CPU object.

Cc: libvir-list@redhat.com
Cc: Jiri Denemark <jdenemar@redhat.com>
Message-Id: <20170222190029.17243-3-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:31 -03:00
Eduardo Habkost
5adbed3088 i386: Define static "base" CPU model
The query-cpu-model-expand QMP command needs at least one static
model, to allow the "static" expansion mode to be implemented.
Instead of defining static versions of every CPU model, define a
"base" CPU model that has absolutely no feature flag enabled.

Despite having no CPUID data set at all, "-cpu base" is even a
functional CPU:

* It can boot a Slackware Linux 1.01 image with a Linux 0.99.12
  kernel[1].
* It is even possible to boot[2] a modern Fedora x86_64 guest by
  manually enabling the following CPU features:
  -cpu base,+lm,+msr,+pae,+fpu,+cx8,+cmov,+sse,+sse2,+fxsr

[1] http://www.qemu-advent-calendar.org/2014/#day-1
[2] This is what can be seen in the guest:
    [root@localhost ~]# cat /proc/cpuinfo
    processor       : 0
    vendor_id       : unknown
    cpu family      : 0
    model           : 0
    model name      : 00/00
    stepping        : 0
    physical id     : 0
    siblings        : 1
    core id         : 0
    cpu cores       : 1
    apicid          : 0
    initial apicid  : 0
    fpu             : yes
    fpu_exception   : yes
    cpuid level     : 1
    wp              : yes
    flags           : fpu msr pae cx8 cmov fxsr sse sse2 lm nopl
    bugs            :
    bogomips        : 5832.70
    clflush size    : 64
    cache_alignment : 64
    address sizes   : 36 bits physical, 48 bits virtual
    power management:

    [root@localhost ~]# x86info -v -a
    x86info v1.30.  Dave Jones 2001-2011
    Feedback to <davej@redhat.com>.

    No TSC, MHz calculation cannot be performed.
    Unknown vendor (0)
    MP Table:

    Family: 0 Model: 0 Stepping: 0
    CPU Model (x86info's best guess):

    eax in: 0x00000000, eax = 00000001 ebx = 00000000 ecx = 00000000 edx = 00000000
    eax in: 0x00000001, eax = 00000000 ebx = 00000800 ecx = 00000000 edx = 07008161

    eax in: 0x80000000, eax = 80000001 ebx = 00000000 ecx = 00000000 edx = 00000000
    eax in: 0x80000001, eax = 00000000 ebx = 00000000 ecx = 00000000 edx = 20000000

    Feature flags:
     fpu            Onboard FPU
     msr            Model-Specific Registers
     pae            Physical Address Extensions
     cx8            CMPXCHG8 instruction
     cmov           CMOV instruction
     fxsr           FXSAVE and FXRSTOR instructions
     sse            SSE support
     sse2           SSE2 support

    Long NOPs supported: yes

    Address sizes : 0 bits physical, 0 bits virtual
    0MHz processor (estimate).

     running at an estimated 0MHz
    [root@localhost ~]#

Message-Id: <20170222190029.17243-2-ehabkost@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:27 -03:00
Eduardo Habkost
0bacd8b304 i386: Don't set CPUClass::cpu_def on "max" model
Host CPUID info is used by the "max" CPU model only in KVM mode.
Move the initialization of CPUID data for "max" from class_init
to instance_init, and don't set CPUClass::cpu_def for "max".

Message-Id: <20170222183919.11928-4-ehabkost@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:25 -03:00
Eduardo Habkost
6900d1cc8a i386: Make "max" model not use any host CPUID info on TCG
Instead of reporting host CPUID data on "max", use the qemu64 CPU
model as reference to initialize CPUID
vendor/family/model/stepping/model-id.

Message-Id: <20170222183919.11928-3-ehabkost@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:23 -03:00
Eduardo Habkost
c62f2630f8 i386: Create "max" CPU model
Rename the existing "host" CPU model to "max, and set it to
kvm_enabled=false. The new "max" CPU model will be able to enable
all features supported by TCG out of the box, because its logic
is based on x86_cpu_get_supported_feature_word(), which already
works with TCG.

A new KVM-specific "host" class was added, that simply inherits
everything from "max" except the 'ordering' and 'description'
fields.

Message-Id: <20170222183919.11928-2-ehabkost@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:21 -03:00
Eduardo Habkost
a357a65b66 qapi-schema: Comment about full expansion of non-migration-safe models
Add a note warning that static expansion may not be 100% accurate
when the CPU model is not migration-safe. This will be the case
on x86 when expansing the "host" CPU model, because there are
"host" features that can't have a migration-safe representation
(e.g. "host-cache-info").

Message-Id: <20170116211124.29245-3-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:13 -03:00
Eduardo Habkost
b8d834a00f i386: Reorganize and document CPUID initialization steps
CPU runnability checks and CPU model expansion have slightly
different requirements. Document the steps involved in loading a
CPU model and realizing a CPU, so their requirements and purpose
are clearly defined.

This patch doesn't change any implementation. It just add
comments, rename the x86_cpu_load_features() function for clarity
(so it won't be confused with x86_cpu_load_def()), and move
x86_cpu_filter_features() closer to it.

Message-Id: <20170116211124.29245-2-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:09 -03:00
Eduardo Habkost
44bd8e5306 i386: Rename X86CPU::host_features to X86CPU::max_features
Rename the field and add a small comment to make its purpose
clearer.

Message-Id: <20170119210449.11991-4-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:06 -03:00
Eduardo Habkost
f48c883703 i386: Add ordering field to CPUClass
Instead of using kvm_enabled to order the "-cpu help" list, use a
new "ordering" field for that.

Message-Id: <20170119210449.11991-3-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:03 -03:00
Eduardo Habkost
771a13e90d i386: Unset cannot_destroy_with_object_finalize_yet on "host" model
The class is now safe because the assert(kvm_enabled()) line was
removed by commit e435601058.

Message-Id: <20170119210449.11991-2-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:22:58 -03:00
Peter Maydell
8f2d7c3411 Merge remote-tracking branch 'remotes/berrange/tags/pull-qcrypto-2017-02-27-1' into staging
Merge qcrypto 2017/02/27 v1

# gpg: Signature made Mon 27 Feb 2017 13:37:34 GMT
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qcrypto-2017-02-27-1:
  crypto: assert cipher algorithm is always valid
  crypto: fix leak in ivgen essiv init

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-27 15:33:21 +00:00
Gerd Hoffmann
2dc120beb8 vnc: fix double free issues
Reported by Coverity: CID 1371242, 1371243, 1371244.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487682332-29154-1-git-send-email-kraxel@redhat.com
2017-02-27 16:22:01 +01:00
Gerd Hoffmann
8bf69b499a spice: add display & head options
This allows to specify display and head to use, simliar to vnc.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1487663858-11731-1-git-send-email-kraxel@redhat.com
2017-02-27 16:21:23 +01:00
Daniel P. Berrange
857e479552 ui: Use XkbGetMap and XkbGetNames instead of XkbGetKeyboard
XkbGetKeyboard does not work in XWayland and even on non-Wayland
X11 servers its use is discouraged:

  https://bugs.freedesktop.org/show_bug.cgi?id=89240

This resolves a problem whereby QEMU prints

  "could not lookup keycode name"

on startup when running under XWayland. Keymap handling is
however still broken after this commit, since Xwayland is
reporting a keymap we can't handle

  "unknown keycodes `(unnamed)', please report to qemu-devel@nongnu.org"

NB, native Wayland support (which is the default under GTK3) is
not affected - only XWayland (which can be requested with GDK_BACKEND
on GTK3, and is the only option for GTK2).

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170227132343.30824-1-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-27 16:19:47 +01:00
Gerd Hoffmann
543a7a161f gtk-egl: add scanout_disable support
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-7-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
db6cdfbeba sdl2: add scanout_disable support
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-6-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
46ffd0c031 spice: add scanout_disable support
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-5-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
975896fc88 virtio-gpu: use dpy_gl_scanout_disable
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-4-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
eaa92c76ce console: add dpy_gl_scanout_disable
Helper function (and DisplayChangeListenerOps ptr) to disable scanouts.
Replaces using dpy_gl_scanout_texture with 0x0 size and no texture
specified.

Allows cleanups to make the io and gfx emulation code more readable.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-3-git-send-email-kraxel@redhat.com
2017-02-27 16:15:28 +01:00
Gerd Hoffmann
f4c36bdab6 console: rename dpy_gl_scanout to dpy_gl_scanout_texture
We'll add a variant which accepts dmabufs soon.  Change
the name so we can easily disturgish the two variants.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-2-git-send-email-kraxel@redhat.com
2017-02-27 16:15:28 +01:00
Paolo Bonzini
1ab17f9f5c tests-aio-multithread: use atomic_read properly
nodes[id].next is written by other threads.  If atomic_read is not used
(matching atomic_set in mcs_mutex_lock!) the compiler can optimize the
whole "if" away!

Reported-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Greg Kurz <groug@kaod.org>
Message-id: 20170227111726.9237-1-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 14:00:53 +00:00
Paolo Bonzini
d045c466d9 iscsi: do not use aio_context_acquire/release
Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect libiscsi calls with a QemuMutex.  Callbacks are invoked
using bottom halves, so we don't even have to drop it around
callback invocations.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170222180725.28611-4-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 13:58:58 +00:00
Paolo Bonzini
37d1e4d9bf nfs: do not use aio_context_acquire/release
Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect libnfs calls with a QemuMutex.  Callbacks are invoked
using bottom halves, so we don't even have to drop it around
callback invocations.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170222180725.28611-3-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 13:58:53 +00:00
Prasad J Pandit
32c813e6c2 crypto: assert cipher algorithm is always valid
Crypto routines 'qcrypto_cipher_get_block_len' and
'qcrypto_cipher_get_key_len' return non-zero cipher block and key
lengths from static arrays 'alg_block_len[]' and 'alg_key_len[]'
respectively. Returning 'zero(0)' value from either of them would
likely lead to an error condition.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-27 13:37:14 +00:00
Li Qiang
0072d2a9fc crypto: fix leak in ivgen essiv init
On error path, the 'salt' doesn't been freed thus leading
a memory leak. This patch avoid this.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-27 13:37:14 +00:00
Paolo Bonzini
ba3186c4e4 curl: do not use aio_context_acquire/release
Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect BDRVCURLState access with a QemuMutex.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170222180725.28611-2-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 13:33:24 +00:00
Paolo Bonzini
3b1d816984 tests-aio-multithread: use atomic_read properly
nodes[id].next is written by other threads.  If atomic_read is not used
(matching atomic_set in mcs_mutex_lock!) the compiler can optimize the
whole "if" away!

Reported-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Greg Kurz <groug@kaod.org>
Message-id: 20170227111726.9237-1-pbonzini@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-27 12:54:08 +00:00
Peter Maydell
d992f2f136 Merge remote-tracking branch 'remotes/artyom/tags/pull-sun4v-20170226' into staging
Pull request for Niagara patches 2017 02 26

# gpg: Signature made Sun 26 Feb 2017 21:56:06 GMT
# gpg:                using RSA key 0x3360C3F7411A125F
# gpg: Good signature from "Artyom Tarasenko <atar4qemu@gmail.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2AD8 6149 17F4 B2D7 05C0  BB12 3360 C3F7 411A 125F

* remotes/artyom/tags/pull-sun4v-20170226:
  niagara: check if a serial port is available
  niagara: fail if a firmware file is missing

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 22:40:23 +00:00
Artyom Tarasenko
a5a08302d4 niagara: check if a serial port is available
Reported-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Artyom Tarasenko <atar4qemu@gmail.com>
2017-02-26 22:46:08 +01:00
Artyom Tarasenko
5e3a549498 niagara: fail if a firmware file is missing
fail if a firmware file is missing and not qtest_enabled(),
the later is necessary to allow some basic tests if
firmware is not available

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Artyom Tarasenko <atar4qemu@gmail.com>
2017-02-26 22:44:25 +01:00
Peter Maydell
685783c5b6 Merge remote-tracking branch 'remotes/thibault/tags/samuel-thibault' into staging
slirp updates

# gpg: Signature made Sun 26 Feb 2017 14:40:00 GMT
# gpg:                using RSA key 0xB0A51BF58C9179C5
# gpg: Good signature from "Samuel Thibault <samuel.thibault@aquilenet.fr>"
# gpg:                 aka "Samuel Thibault <sthibault@debian.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@gnu.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@inria.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@labri.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@ens-lyon.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@u-bordeaux.fr>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 900C B024 B679 31D4 0F82  304B D017 8C76 7D06 9EE6
#      Subkey fingerprint: AEBF 7448 FAB9 453A 4552  390E B0A5 1BF5 8C91 79C5

* remotes/thibault/tags/samuel-thibault:
  slirp: tcp_listen(): Don't try to close() an fd we never opened
  slirp: Convert mbufs to use g_malloc() and g_free()
  slirp: Check qemu_socket() return value in udp_listen()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 16:38:40 +00:00
Peter Maydell
bd5d2353aa slirp: tcp_listen(): Don't try to close() an fd we never opened
Coverity points out (CID 1005725) that an error-exit path in tcp_listen()
will try to close(s) even if the reason it got there was that the
qemu_socket() failed and s was never opened.  Not only that, this isn't even
the right function to use, because we need closesocket() to do the right
thing on Windows.  Change to using the right function and only calling it if
needed.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-02-26 15:39:29 +01:00
Peter Maydell
70f2e64e4d slirp: Convert mbufs to use g_malloc() and g_free()
The mbuf code currently doesn't check the result of doing a malloc()
or realloc() of its data (spotted by Coverity, CID 1238946).
Since the m_inc() API assumes that extending an mbuf must succeed,
just convert to g_malloc() and g_free().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-02-26 15:39:05 +01:00
Peter Maydell
4577b09a27 slirp: Check qemu_socket() return value in udp_listen()
Check the return value from qemu_socket() rather than trying to
pass it to bind() as an fd argument even if it's negative.
This wouldn't have caused any negative consequences, because
it won't be a valid fd number and the bind call will fail;
but Coverity complains (CID 1005723).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-02-26 15:38:38 +01:00
Peter Maydell
6b4e463ff3 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Fri 24 Feb 2017 18:08:26 GMT
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  tests: Use opened block node for block job tests
  vvfat: Use opened node as backing file
  block: Add bdrv_new_open_driver()
  block: Factor out bdrv_open_driver()
  block: Use BlockBackend for image probing
  block: Factor out bdrv_open_child_bs()
  block: Attach bs->file only during .bdrv_open()
  block: Pass BdrvChild to bdrv_truncate()
  mirror: Resize active commit base in mirror_run()
  qcow2: Use BB for resizing in qcow2_amend_options()
  blockdev: Use BlockBackend to resize in qmp_block_resize()
  iotests: Fix another race in 030
  qemu-img: Improve documentation for PREALLOC_MODE_FALLOC
  qemu-img: Truncate before full preallocation
  qemu-img: Add tests for raw image preallocation
  qemu-img: Do not truncate before preallocation
  qemu-iotests: redirect nbd server stdout to /dev/null
  qemu-iotests: add ability to exclude certain protocols from tests
  qemu-iotests: Test 137 only supports 'file' protocol

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 12:26:37 +00:00
Peter Maydell
6528a4c1f2 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Fri 24 Feb 2017 17:45:53 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  RBD: Add support readv,writev for rbd
  block/nfs: try to avoid the bounce buffer in pwritev
  block/nfs: convert to preadv / pwritev

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 11:47:00 +00:00
Peter Maydell
6d3f4c6d1d Merge remote-tracking branch 'remotes/yongbok/tags/mips-20170224-2' into staging
MIPS patches 2017-02-24-2

CHanges:
* Add the Boston board with fixing the make check issue on 32-bit hosts.

# gpg: Signature made Fri 24 Feb 2017 11:43:45 GMT
# gpg:                using RSA key 0x2238EB86D5F797C2
# gpg: Good signature from "Yongbok Kim <yongbok.kim@imgtec.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 8600 4CF5 3415 A5D9 4CFA  2B5C 2238 EB86 D5F7 97C2

* remotes/yongbok/tags/mips-20170224-2:
  hw/mips: MIPS Boston board support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 21:15:14 +00:00
Peter Maydell
28f997a82c Merge remote-tracking branch 'remotes/stsquad/tags/pull-mttcg-240217-1' into staging
This is the MTTCG pull-request as posted yesterday.

# gpg: Signature made Fri 24 Feb 2017 11:17:51 GMT
# gpg:                using RSA key 0xFBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>"
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8  DF35 FBD0 DB09 5A9E 2A44

* remotes/stsquad/tags/pull-mttcg-240217-1: (24 commits)
  tcg: enable MTTCG by default for ARM on x86 hosts
  hw/misc/imx6_src: defer clearing of SRC_SCR reset bits
  target-arm: ensure all cross vCPUs TLB flushes complete
  target-arm: don't generate WFE/YIELD calls for MTTCG
  target-arm/powerctl: defer cpu reset work to CPU context
  cputlb: introduce tlb_flush_*_all_cpus[_synced]
  cputlb: atomically update tlb fields used by tlb_reset_dirty
  cputlb: add tlb_flush_by_mmuidx async routines
  cputlb and arm/sparc targets: convert mmuidx flushes from varg to bitmap
  cputlb: introduce tlb_flush_* async work.
  cputlb: tweak qemu_ram_addr_from_host_nofail reporting
  cputlb: add assert_cpu_is_self checks
  tcg: handle EXCP_ATOMIC exception for system emulation
  tcg: enable thread-per-vCPU
  tcg: enable tb_lock() for SoftMMU
  tcg: remove global exit_request
  tcg: drop global lock during TCG code execution
  tcg: rename tcg_current_cpu to tcg_current_rr_cpu
  tcg: add kick timer for single-threaded vCPU emulation
  tcg: add options for enabling MTTCG
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 18:43:52 +00:00
Peter Maydell
2421f381dc Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20170224' into staging
A selection of s390x patches:
- cleanups, fixes and improvements
- program check loop detection (useful with the corresponding kernel
  patch)
- wire up virtio-crypto for ccw
- and finally support many virtqueues for virtio-ccw

# gpg: Signature made Fri 24 Feb 2017 09:19:19 GMT
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20170224:
  s390x/css: handle format-0 TIC CCW correctly
  s390x/arch_dump: pass cpuid into notes sections
  s390x/arch_dump: use proper note name and note size
  virtio-ccw: support VIRTIO_QUEUE_MAX virtqueues
  s390x: bump ADAPTER_ROUTES_MAX_GSI
  virtio-ccw: check flic->adapter_routes_max_batch
  s390x: add property adapter_routes_max_batch
  virtio-ccw: Check the number of vqs in CCW_CMD_SET_IND
  virtio-ccw: add virtio-crypto-ccw device
  virtio-ccw: handle virtio 1 only devices
  s390x/flic: fail migration on source already
  s390x/kvm: detect some program check loops
  s390x/s390-virtio: get rid of DPRINTF

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 17:48:49 +00:00
Peter Maydell
f62ab6bb8f Merge remote-tracking branch 'remotes/famz/tags/for-upstream' into staging
Docker testing and shippable patches

Hi Peter,

These are testing and build automation patches:

- Shippable.com powered CI config
- Docker cross build
- Fixes and MAINTAINERS tweaks.

# gpg: Signature made Fri 24 Feb 2017 06:31:10 GMT
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/for-upstream:
  docker: Install python2 explicitly in docker image
  MAINTAINERS: merge Build and test automation with Docker tests
  .shippable.yml: new CI provider
  new: debian docker targets for cross-compiling
  tests/docker: add basic user mapping support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 16:37:32 +00:00
Peter Maydell
d7941f4eed Merge remote-tracking branch 'remotes/armbru/tags/pull-util-2017-02-23' into staging
option cutils: Fix and clean up number conversions

# gpg: Signature made Thu 23 Feb 2017 19:41:17 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-util-2017-02-23: (24 commits)
  option: Fix checking of sizes for overflow and trailing crap
  util/cutils: Change qemu_strtosz*() from int64_t to uint64_t
  util/cutils: Return qemu_strtosz*() error and value separately
  util/cutils: Let qemu_strtosz*() optionally reject trailing crap
  qemu-img: Wrap cvtnum() around qemu_strtosz()
  test-cutils: Drop suffix from test_qemu_strtosz_simple()
  test-cutils: Use qemu_strtosz() more often
  util/cutils: Drop QEMU_STRTOSZ_DEFSUFFIX_* macros
  util/cutils: New qemu_strtosz()
  util/cutils: Rename qemu_strtosz() to qemu_strtosz_MiB()
  util/cutils: New qemu_strtosz_metric()
  test-cutils: Cover qemu_strtosz() around range limits
  test-cutils: Cover qemu_strtosz() with trailing crap
  test-cutils: Cover qemu_strtosz() invalid input
  test-cutils: Add missing qemu_strtosz()... endptr checks
  option: Fix to reject invalid and overflowing numbers
  util/cutils: Clean up control flow around qemu_strtol() a bit
  util/cutils: Clean up variable names around qemu_strtol()
  util/cutils: Rename qemu_strtoll(), qemu_strtoull()
  util/cutils: Rewrite documentation of qemu_strtol() & friends
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 18:34:27 +00:00
tianqing
1d393bdeae RBD: Add support readv,writev for rbd
Rbd can do readv and writev directly, so wo do not need to transform
iov to buf or vice versa any more.

Signed-off-by: tianqing <tianqing@unitedstack.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-24 12:43:01 -05:00
Peter Lieven
ef503a8417 block/nfs: try to avoid the bounce buffer in pwritev
if the passed qiov contains exactly one iov we can
pass the buffer directly.

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1487349541-10201-3-git-send-email-pl@kamp.de
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-24 12:38:35 -05:00
Peter Lieven
69785a229d block/nfs: convert to preadv / pwritev
Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1487349541-10201-2-git-send-email-pl@kamp.de
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-24 12:38:35 -05:00
Peter Maydell
6959e4523e Merge remote-tracking branch 'remotes/awilliam/tags/vfio-updates-20170223.0' into staging
VFIO updates 2017-02-23

 - Report qdev_unplug errors (Alex Williamson)
 - Fix ecap ID 0 handling, improve comment (Alex Williamson)
 - Disable IGD stolen memory in UPT mode too (Xiong Zhang)

# gpg: Signature made Thu 23 Feb 2017 19:04:17 GMT
# gpg:                using RSA key 0x239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B  8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-updates-20170223.0:
  vfio/pci-quirks.c: Disable stolen memory for igd VFIO
  vfio/pci: Improve extended capability comments, skip masked caps
  vfio/pci: Report errors from qdev_unplug() via device request

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 17:27:59 +00:00
Kevin Wolf
d185cf0ec6 tests: Use opened block node for block job tests
blk_insert_bs() and block job related functions will soon require an
opened block node (permission calculations will involve the block
driver), so let our tests be consistent with the real users in this
respect.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
a8a4d15c1c vvfat: Use opened node as backing file
We should not try to assign a not yet opened node as the backing file,
because as soon as the permission system is added it will fail.  The
just added bdrv_new_open_driver() function is the right tool to open a
file with an internal driver, use it.

In case anyone wonders whether that magic fake backing file to trigger a
special action on 'commit' actually works today: No, not for me. One
reason is that we've been adding a raw format driver on top for several
years now and raw doesn't support commit. Other reasons include that the
backing file isn't writable and the driver doesn't support reopen, and
it's also size 0 and the driver doesn't support bdrv_truncate. All of
these are easily fixable, but then 'commit' ended up in an infinite loop
deep in the vvfat code for me, so I thought I'd best leave it alone. I'm
not really sure what it was supposed to do anyway.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
680c7f9606 block: Add bdrv_new_open_driver()
This function allows to create more or less normal BlockDriverStates
even for BlockDrivers that aren't globally registered (e.g. helper
filters for block jobs).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
01a5650179 block: Factor out bdrv_open_driver()
This is a function that doesn't do any option parsing, but just does
some basic BlockDriverState setup and calls the .bdrv_open() function of
the block driver.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
5696c6e350 block: Use BlockBackend for image probing
This fixes the use of a parent-less BdrvChild in bdrv_open_inherit() by
converting it into a BlockBackend. Which is exactly what it should be,
image probing is an external, standalone user of a node. The requests
can't be considered to originate from the format driver node because
that one isn't even opened yet.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
2d6b86af14 block: Factor out bdrv_open_child_bs()
This is the part of bdrv_open_child() that opens a BDS with option
inheritance, but doesn't attach it as a child to the parent yet.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
4e4bf5c42c block: Attach bs->file only during .bdrv_open()
The way that attaching bs->file worked was a bit unusual in that it was
the only child that would be attached to a node which is not opened yet.
Because of this, the block layer couldn't know yet which permissions the
driver would eventually need.

This patch moves the point where bs->file is attached to the beginning
of the individual .bdrv_open() implementations, so drivers already know
what they are going to do with the child. This is also more consistent
with how driver-specific children work.

For a moment, bdrv_open() gets its own BdrvChild to perform image
probing, but instead of directly assigning this BdrvChild to the BDS, it
becomes a temporary one and the node name is passed as an option to the
drivers, so that they can simply use bdrv_open_child() to create another
reference for their own use.

This duplicated child for (the not opened yet) bs is not the final
state, a follow-up patch will change the image probing code to use a
BlockBackend, which is completely independent of bs.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
52cdbc5869 block: Pass BdrvChild to bdrv_truncate()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
becc347e1c mirror: Resize active commit base in mirror_run()
This is more consistent with the commit block job, and it moves the code
to a place where we already have the necessary BlockBackends to resize
the base image when bdrv_truncate() is changed to require a BdrvChild.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
70b27f3643 qcow2: Use BB for resizing in qcow2_amend_options()
In order to able to convert bdrv_truncate() to take a BdrvChild and
later to correctly check the resize permission here, we need to use a
BlockBackend for resizing the image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
7dad9ee646 blockdev: Use BlockBackend to resize in qmp_block_resize()
In order to be able to do permission checking and to keep working with
the BdrvChild based bdrv_truncate() that this involves, we need to
create a temporary BlockBackend to resize the image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
John Snow
2c3b44da07 iotests: Fix another race in 030
We can't rely on a non-paused job to be present and running for us.
Assume that if the job is not present that it completed already.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:23 +01:00
Nir Soffer
c6ccc2c5e6 qemu-img: Improve documentation for PREALLOC_MODE_FALLOC
Now that we are truncating the file in both PREALLOC_MODE_FULL and
PREALLOC_MODE_OFF, not truncating in PREALLOC_MODE_FALLOC looks odd.
Add a comment explaining why we do not truncate in this case.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:23 +01:00
Nir Soffer
5a1dad9d5a qemu-img: Truncate before full preallocation
In a previous commit (qemu-img: Do not truncate before preallocation) we
moved truncate to the PREALLOC_MODE_OFF branch to avoid slowdown in
posix_fallocate().

However this change is not optimal when using PREALLOC_MODE_FULL, since
knowing the final size from the beginning could allow the file system
driver to do less allocations and possibly avoid fragmentation of the
file.

Now we truncate also before doing full preallocation.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:23 +01:00
Nir Soffer
6f993f3fca qemu-img: Add tests for raw image preallocation
Add tests for creating raw image with and without the preallocation
option.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Nir Soffer
f6a7240442 qemu-img: Do not truncate before preallocation
When using file system that does not support fallocate() (e.g. NFS <
4.2), truncating the file only when preallocation=OFF speeds up creating
raw file.

Here is example run, tested on Fedora 24 machine, creating raw file on
NFS version 3 server.

$ time ./qemu-img-master create -f raw -o preallocation=falloc mnt/test 1g
Formatting 'mnt/test', fmt=raw size=1073741824 preallocation=falloc

real	0m21.185s
user	0m0.022s
sys	0m0.574s

$ time ./qemu-img-fix create -f raw -o preallocation=falloc mnt/test 1g
Formatting 'mnt/test', fmt=raw size=1073741824 preallocation=falloc

real	0m11.601s
user	0m0.016s
sys	0m0.525s

$ time dd if=/dev/zero of=mnt/test bs=1M count=1024 oflag=direct
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 15.6627 s, 68.6 MB/s

real	0m16.104s
user	0m0.009s
sys	0m0.220s

Running with strace we can see that without this change we do one
pread() and one pwrite() for each block. With this change, we do only
one pwrite() per block.

$ strace ./qemu-img-master create -f raw -o preallocation=falloc mnt/test 8192
...
pread64(9, "\0", 1, 4095)               = 1
pwrite64(9, "\0", 1, 4095)              = 1
pread64(9, "\0", 1, 8191)               = 1
pwrite64(9, "\0", 1, 8191)              = 1

$ strace ./qemu-img-fix create -f raw -o preallocation=falloc mnt/test 8192
...
pwrite64(9, "\0", 1, 4095)              = 1
pwrite64(9, "\0", 1, 8191)              = 1

This happens because posix_fallocate is checking if each block is
allocated before writing a byte to the block, and when truncating the
file before preallocation, all blocks are unallocated.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Jeff Cody
43421ea05f qemu-iotests: redirect nbd server stdout to /dev/null
Some iotests (e.g. 174) try to filter the output of _make_test_image by
piping the stdout.  Pipe the server stdout to /dev/null, so that filter
pipe does not need to wait until process completion.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Jeff Cody
dfac03dcd4 qemu-iotests: add ability to exclude certain protocols from tests
Add the ability for shell script tests to exclude specific
protocols.  This is useful to allow all protocols except ones known to
not support a feature used in the test (e.g. .bdrv_create).

Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Jeff Cody
2b12baf0e3 qemu-iotests: Test 137 only supports 'file' protocol
Since test 137 make uses of qcow2.py, only local files are supported.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Peter Maydell
fe8ee082db Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-02-22' into staging
QAPI patches for 2017-02-22

# gpg: Signature made Wed 22 Feb 2017 19:12:27 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2017-02-22:
  block: Don't bother asserting type of output visitor's output
  monitor: Clean up handle_hmp_command() a bit
  tests: Don't check qobject_type() before qobject_to_qbool()
  tests: Don't check qobject_type() before qobject_to_qfloat()
  tests: Don't check qobject_type() before qobject_to_qint()
  tests: Don't check qobject_type() before qobject_to_qstring()
  tests: Don't check qobject_type() before qobject_to_qlist()
  Don't check qobject_type() before qobject_to_qdict()
  test-qmp-event: Simplify and tighten event_test_emit()
  libqtest: Clean up qmp_response() a bit
  check-qjson: Simplify around compare_litqobj_to_qobj()
  check-qdict: Tighten qdict_crumple_test_recursive() some
  check-qdict: Simplify qdict_crumple_test_recursive()
  qdict: Make qdict_get_qlist() safe like qdict_get_qdict()
  net: Flatten simple union NetLegacyOptions
  numa: Flatten simple union NumaOptions

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 15:00:51 +00:00
Peter Maydell
63f495beb4 Merge remote-tracking branch 'remotes/kraxel/tags/pull-cve-2017-2620-20170224-1' into staging
cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo (CVE-2017-2620)

# gpg: Signature made Fri 24 Feb 2017 13:42:39 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-cve-2017-2620-20170224-1:
  cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo (CVE-2017-2620)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 13:55:26 +00:00
Gerd Hoffmann
92f2b88cea cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo (CVE-2017-2620)
CIRRUS_BLTMODE_MEMSYSSRC blits do NOT check blit destination
and blit width, at all.  Oops.  Fix it.

Security impact: high.

The missing blit destination check allows to write to host memory.
Basically same as CVE-2014-8106 for the other blit variants.

Cc: qemu-stable@nongnu.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-24 14:35:50 +01:00
Peter Maydell
5842b55fd4 Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20170223-1' into staging
usb: ohci bugfix, switch core to unrealize, xhci property cleanup

# gpg: Signature made Thu 23 Feb 2017 15:37:57 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20170223-1:
  xhci: properties cleanup
  usb: ohci: fix error return code in servicing td
  usb: replace handle_destroy with unrealize

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 12:49:04 +00:00
Paul Burton
df1d8a1f29 hw/mips: MIPS Boston board support
Introduce support for emulating the MIPS Boston development board. The
Boston board is built around an FPGA & 3 PCIe controllers, one of which
is connected to an Intel EG20T Platform Controller Hub. It is used
during the development & debug of new CPUs and the software intended to
run on them, and is essentially the successor to the older MIPS Malta
board.

This patch does not implement the EG20T, instead connecting an already
supported ICH-9 AHCI controller. Whilst this isn't accurate it's enough
for typical stock Boston software (eg. Linux kernels) to work with hard
disks given that both the ICH-9 & EG20T implement the AHCI
specification.

Boston boards typically boot kernels in the FIT image format, and this
patch will treat kernels provided to QEMU as such. When loading a kernel
directly, the board code will generate minimal firmware much as the
Malta board code does. This firmware will set up the CM, CPC & GIC
register base addresses then set argument registers & jump to the kernel
entry point. Alternatively, bootloader code may be loaded using the bios
argument in which case no firmware will be generated & execution will
proceed from the start of the boot code at the default MIPS boot
exception vector (offset 0x1fc00000 into (c)kseg1).

Currently real Boston boards are always used with FPGA bitfiles that
include a Global Interrupt Controller (GIC), so the interrupt
configuration is only defined for such cases. Therefore the board will
only allow use of CPUs which implement the CPS components, including the
GIC, and will otherwise exit with a message.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Yongbok Kim <yongbok.kim@imgtec.com>
[yongbok.kim@imgtec.com:
  isolated boston machine support for mips64el.
  updated for recent Chardev changes.
  ignore missing bios/kernel for qtest.
  added default -drive to if=ide explicitly.
  changed default memory size into 1G due to make check failure
  on 32-bit hosts]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-24 10:37:21 +00:00
Alex Bennée
ca759f9e38 tcg: enable MTTCG by default for ARM on x86 hosts
This enables the multi-threaded system emulation by default for ARMv7
and ARMv8 guests using the x86_64 TCG backend. This is because on the
guest side:

  - The ARM translate.c/translate-64.c have been converted to
    - use MTTCG safe atomic primitives
    - emit the appropriate barrier ops
  - The ARM machine has been updated to
    - hold the BQL when modifying shared cross-vCPU state
    - defer powerctl changes to async safe work

All the host backends support the barrier and atomic primitives but
need to provide same-or-better support for normal load/store
operations.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Acked-by: Peter Maydell <peter.maydell@linaro.org>
Tested-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
2017-02-24 10:32:46 +00:00
Alex Bennée
4881658a4b hw/misc/imx6_src: defer clearing of SRC_SCR reset bits
The arm_reset_cpu/set_cpu_on/set_cpu_off() functions do their work
asynchronously in the target vCPUs context. As a result we need to
ensure the SRC_SCR reset bits correctly report the reset status at the
right time. To do this we defer the clearing of the bit with an async
job which will run after the work queued by ARM powerctl functions.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
a67cf27727 target-arm: ensure all cross vCPUs TLB flushes complete
Previously flushes on other vCPUs would only get serviced when they
exited their TranslationBlocks. While this isn't overly problematic it
violates the semantics of TLB flush from the point of view of source
vCPU.

To solve this we call the cputlb *_all_cpus_synced() functions to do
the flushes which ensures all flushes are completed by the time the
vCPU next schedules its own work. As the TLB instructions are modelled
as CP writes the TB ends at this point meaning cpu->exit_request will
be checked before the next instruction is executed.

Deferring the work until the architectural sync point is a possible
future optimisation.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
c22edfebff target-arm: don't generate WFE/YIELD calls for MTTCG
The WFE and YIELD instructions are really only hints and in TCG's case
they were useful to move the scheduling on from one vCPU to the next. In
the parallel context (MTTCG) this just causes an unnecessary cpu_exit
and contention of the BQL.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
062ba099e0 target-arm/powerctl: defer cpu reset work to CPU context
When switching a new vCPU on we want to complete a bunch of the setup
work before we start scheduling the vCPU thread. To do this cleanly we
defer vCPU setup to async work which will run the vCPUs execution
context as the thread is woken up. The scheduling of the work will kick
the vCPU awake.

This avoids potential races in MTTCG system emulation.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
c3b9a07a33 cputlb: introduce tlb_flush_*_all_cpus[_synced]
This introduces support to the cputlb API for flushing all CPUs TLBs
with one call. This avoids the need for target helpers to iterate
through the vCPUs themselves.

An additional variant of the API (_synced) will cause the source vCPUs
work to be scheduled as "safe work". The result will be all the flush
operations will be complete by the time the originating vCPU executes
its safe work. The calling implementation can either end the TB
straight away (which will then pick up the cpu->exit_request on
entering the next block) or defer the exit until the architectural
sync point (usually a barrier instruction).

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
b0706b7167 cputlb: atomically update tlb fields used by tlb_reset_dirty
The main use case for tlb_reset_dirty is to set the TLB_NOTDIRTY flags
in TLB entries to force the slow-path on writes. This is used to mark
page ranges containing code which has been translated so it can be
invalidated if written to. To do this safely we need to ensure the TLB
entries in question for all vCPUs are updated before we attempt to run
the code otherwise a race could be introduced.

To achieve this we atomically set the flag in tlb_reset_dirty_range and
take care when setting it when the TLB entry is filled.

On 32 bit systems attempting to emulate 64 bit guests we don't even
bother as we might not have the atomic primitives available. MTTCG is
disabled in this case and can't be forced on. The copy_tlb_helper
function helps keep the atomic semantics in one place to avoid
confusion.

The dirty helper function is made static as it isn't used outside of
cputlb.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
e72184455c cputlb: add tlb_flush_by_mmuidx async routines
This converts the remaining TLB flush routines to use async work when
detecting a cross-vCPU flush. The only minor complication is having to
serialise the var_list of MMU indexes into a form that can be punted
to an asynchronous job.

The pending_tlb_flush field on QOM's CPU structure also becomes a
bitfield rather than a boolean.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
0336cbf853 cputlb and arm/sparc targets: convert mmuidx flushes from varg to bitmap
While the vargs approach was flexible the original MTTCG ended up
having munge the bits to a bitmap so the data could be used in
deferred work helpers. Instead of hiding that in cputlb we push the
change to the API to make it take a bitmap of MMU indexes instead.

For ARM some the resulting flushes end up being quite long so to aid
readability I've tended to move the index shifting to a new line so
all the bits being or-ed together line up nicely, for example:

    tlb_flush_page_by_mmuidx(other_cs, pageaddr,
                             (1 << ARMMMUIdx_S1SE1) |
                             (1 << ARMMMUIdx_S1SE0));

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
[AT: SPARC parts only]
Reviewed-by: Artyom Tarasenko <atar4qemu@gmail.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
[PM: ARM parts only]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
KONRAD Frederic
e3b9ca8109 cputlb: introduce tlb_flush_* async work.
Some architectures allow to flush the tlb of other VCPUs. This is not a problem
when we have only one thread for all VCPUs but it definitely needs to be an
asynchronous work when we are in true multithreaded work.

We take the tb_lock() when doing this to avoid racing with other threads
which may be invalidating TB's at the same time. The alternative would
be to use proper atomic primitives to clear the tlb entries en-mass.

This patch doesn't do anything to protect other cputlb function being
called in MTTCG mode making cross vCPU changes.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
[AJB: remove need for g_malloc on defer, make check fixes, tb_lock]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
857baec1d9 cputlb: tweak qemu_ram_addr_from_host_nofail reporting
This moves the helper function closer to where it is called and updates
the error message to report via error_report instead of the deprecated
fprintf.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
f0aff0f124 cputlb: add assert_cpu_is_self checks
For SoftMMU the TLB flushes are an example of a task that can be
triggered on one vCPU by another. To deal with this properly we need to
use safe work to ensure these changes are done safely. The new assert
can be enabled while debugging to catch these cases.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Pranith Kumar
08e73c48b0 tcg: handle EXCP_ATOMIC exception for system emulation
The patch enables handling atomic code in the guest. This should be
preferably done in cpu_handle_exception(), but the current assumptions
regarding when we can execute atomic sections cause a deadlock.

The current mechanism discards the flags which were set in atomic
execution. We ensure they are properly saved by calling the
cc->cpu_exec_enter/leave() functions around the loop.

As we are running cpu_exec_step_atomic() from the outermost loop we
need to avoid an abort() when single stepping over atomic code since
debug exception longjmp will point to the the setlongjmp in
cpu_exec(). We do this by setting a new jmp_env so that it jumps back
here on an exception.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
[AJB: tweak title, merge with new patches, add mmap_lock]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
CC: Paolo Bonzini <pbonzini@redhat.com>
2017-02-24 10:32:45 +00:00
Alex Bennée
372579427a tcg: enable thread-per-vCPU
There are a couple of changes that occur at the same time here:

  - introduce a single vCPU qemu_tcg_cpu_thread_fn

  One of these is spawned per vCPU with its own Thread and Condition
  variables. qemu_tcg_rr_cpu_thread_fn is the new name for the old
  single threaded function.

  - the TLS current_cpu variable is now live for the lifetime of MTTCG
    vCPU threads. This is for future work where async jobs need to know
    the vCPU context they are operating in.

The user to switch on multi-thread behaviour and spawn a thread
per-vCPU. For a simple test kvm-unit-test like:

  ./arm/run ./arm/locking-test.flat -smp 4 -accel tcg,thread=multi

Will now use 4 vCPU threads and have an expected FAIL (instead of the
unexpected PASS) as the default mode of the test has no protection when
incrementing a shared variable.

We enable the parallel_cpus flag to ensure we generate correct barrier
and atomic code if supported by the front and backends. This doesn't
automatically enable MTTCG until default_mttcg_enabled() is updated to
check the configuration is supported.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[AJB: Some fixes, conditionally, commit rewording]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
2f16960660 tcg: enable tb_lock() for SoftMMU
tb_lock() has long been used for linux-user mode to protect code
generation. By enabling it now we prepare for MTTCG and ensure all code
generation is serialised by this lock. The other major structure that
needs protecting is the l1_map and its PageDesc structures. For the
SoftMMU case we also use tb_lock() to protect these structures instead
of linux-user mmap_lock() which as the name suggests serialises updates
to the structure as a result of guest mmap operations.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
e5143e30fb tcg: remove global exit_request
There are now only two uses of the global exit_request left.

The first ensures we exit the run_loop when we first start to process
pending work and in the kick handler. This is just as easily done by
setting the first_cpu->exit_request flag.

The second use is in the round robin kick routine. The global
exit_request ensured every vCPU would set its local exit_request and
cause a full exit of the loop. Now the iothread isn't being held while
running we can just rely on the kick handler to push us out as intended.

We lightly re-factor the main vCPU thread to ensure cpu->exit_requests
cause us to exit the main loop and process any IO requests that might
come along. As an cpu->exit_request may legitimately get squashed
while processing the EXCP_INTERRUPT exception we also check
cpu->queued_work_first to ensure queued work is expedited as soon as
possible.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Jan Kiszka
8d04fb55de tcg: drop global lock during TCG code execution
This finally allows TCG to benefit from the iothread introduction: Drop
the global mutex while running pure TCG CPU code. Reacquire the lock
when entering MMIO or PIO emulation, or when leaving the TCG loop.

We have to revert a few optimization for the current TCG threading
model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not
kicking it in qemu_cpu_kick. We also need to disable RAM block
reordering until we have a more efficient locking mechanism at hand.

Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here.
These numbers demonstrate where we gain something:

20338 jan       20   0  331m  75m 6904 R   99  0.9   0:50.95 qemu-system-arm
20337 jan       20   0  331m  75m 6904 S   20  0.9   0:26.50 qemu-system-arm

The guest CPU was fully loaded, but the iothread could still run mostly
independent on a second core. Without the patch we don't get beyond

32206 jan       20   0  330m  73m 7036 R   82  0.9   1:06.00 qemu-system-arm
32204 jan       20   0  330m  73m 7036 S   21  0.9   0:17.03 qemu-system-arm

We don't benefit significantly, though, when the guest is not fully
loading a host CPU.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com>
[FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex]
Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
[EGC: fixed iothread lock for cpu-exec IRQ handling]
Signed-off-by: Emilio G. Cota <cota@braap.org>
[AJB: -smp single-threaded fix, clean commit msg, BQL fixes]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
[PM: target-arm changes]
Acked-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:45 +00:00
Alex Bennée
791158d93b tcg: rename tcg_current_cpu to tcg_current_rr_cpu
..and make the definition local to cpus. In preparation for MTTCG the
concept of a global tcg_current_cpu will no longer make sense. However
we still need to keep track of it in the single-threaded case to be able
to exit quickly when required.

qemu_cpu_kick_no_halt() moves and becomes qemu_cpu_kick_rr_cpu() to
emphasise its use-case. qemu_cpu_kick now kicks the relevant cpu as
well as qemu_kick_rr_cpu() which will become a no-op in MTTCG.

For the time being the setting of the global exit_request remains.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
2017-02-24 10:32:45 +00:00
Alex Bennée
6546706d28 tcg: add kick timer for single-threaded vCPU emulation
Currently we rely on the side effect of the main loop grabbing the
iothread_mutex to give any long running basic block chains a kick to
ensure the next vCPU is scheduled. As this code is being re-factored and
rationalised we now do it explicitly here.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
2017-02-24 10:32:45 +00:00
KONRAD Frederic
8d4e9146b3 tcg: add options for enabling MTTCG
We know there will be cases where MTTCG won't work until additional work
is done in the front/back ends to support. It will however be useful to
be able to turn it on.

As a result MTTCG will default to off unless the combination is
supported. However the user can turn it on for the sake of testing.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
[AJB: move to -accel tcg,thread=multi|single, defaults]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
2093714314 tcg: move TCG_MO/BAR types into own file
We'll be using the memory ordering definitions to define values for
both the host and guest. To avoid fighting with circular header
dependencies just move these types into their own minimal header.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Pranith Kumar
4ec667042d mttcg: Add missing tb_lock/unlock() in cpu_exec_step()
The recent patch enabling lock assertions uncovered the missing lock
acquisition in cpu_exec_step(). This patch adds them.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Pranith Kumar
6ac3d7e845 mttcg: translate-all: Enable locking debug in a debug build
Enable tcg lock debug asserts in a debug build by default instead of
relying on DEBUG_LOCKING. None of the other DEBUG_* macros have
asserts, so this patch removes DEBUG_LOCKING and enable these asserts
in a debug build.

CC: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
[AJB: tweak ifdefs so can be early in series]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
c6489dd921 docs: new design document multi-thread-tcg.txt
This documents the current design for upgrading TCG emulation to take
advantage of modern CPUs by running a thread-per-CPU. The document goes
through the various areas of the code affected by such a change and
proposes design requirements for each part of the solution.

The text marked with (Current solution[s]) to document what the current
approaches being used are.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Peter Maydell
5522924718 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.9-20170222' into staging
ppc patch queue for 2017-02-22

This pull request has:
   * Yet more POWER9 instruction implementations
   * Some extensions to the softfloat code which are necesssary for
     some of those instructions
   * Some preliminary patches in preparation for POWER9 softmmu
     implementation
   * Igor Mammedov's cleanups to unify hotplug cpu handling across
     architectures
   * Assorted bugfixes

The softfloat and cpu hotplug changes aren't entirely ppc specific (in
fact the hotplug stuff contains some pc specific patches).  However
they're included here because ppc is one of the main beneficiaries,
and the series depend on some ppc specific patches.

# gpg: Signature made Wed 22 Feb 2017 06:29:47 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.9-20170222: (43 commits)
  hw/ppc/ppc405_uc.c: Avoid integer overflows
  hw/ppc/spapr: Check for valid page size when hot plugging memory
  target-ppc: fix Book-E TLB matching
  hw/net/spapr_llan: 6 byte mac address device tree entry
  machine: replace query_hotpluggable_cpus() callback with has_hotpluggable_cpus flag
  machine: unify [pc_|spapr_]query_hotpluggable_cpus() callbacks
  spapr: reuse machine->possible_cpus instead of cores[]
  change CPUArchId.cpu type to Object*
  pc: pass apic_id to pc_find_cpu_slot() directly so lookup could be done without CPU object
  pc: calculate topology only once when possible_cpus is initialised
  pc: move pcms->possible_cpus init out of pc_cpus_init()
  machine: move possible_cpus to MachineState
  hw/pci-host/prep: Do not use hw_error() in realize function
  target/ppc/POWER9: Direct all instr and data storage interrupts to the hypv
  target/ppc/POWER9: Adapt LPCR handling for POWER9
  target/ppc/POWER9: Add ISAv3.00 MMU definition
  target/ppc: Fix LPCR DPFD mask define
  target-ppc: Add xscvqpudz and xscvqpuwz instructions
  target-ppc: Implement round to odd variants of quad FP instructions
  softfloat: Add float128_to_uint32_round_to_zero()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:13:57 +00:00
Dong Jia Shi
9f94f84ce7 s390x/css: handle format-0 TIC CCW correctly
For TIC CCW, bit positions 8-32 of the format-1 CCW must contain zeros;
otherwise, a program-check condition is generated. For format-0 TIC CCWs,
bits 32-63 are ignored.

To convert TIC from format-0 CCW to format-1 CCW correctly, let's clear
bits 8-32 to guarantee compatibility.

Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Christian Borntraeger
f738f296ea s390x/arch_dump: pass cpuid into notes sections
we need to pass the cpuid into the pid field of the notes
section, otherwise the notes for different CPUs all have 0:

e.g. objdump -h shows:
old:
  5 .reg-s390-prefix/0 00000004  0000000000000000  0000000000000000
  6 .reg-s390-prefix 00000004  0000000000000000  0000000000000000
 21 .reg-s390-prefix/0 00000004  0000000000000000  0000000000000000
new:
  5 .reg-s390-prefix/1 00000004  0000000000000000  0000000000000000
  6 .reg-s390-prefix 00000004  0000000000000000  0000000000000000
 21 .reg-s390-prefix/2 00000004  0000000000000000  0000000000000000

Reported-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Christian Borntraeger
5f706fdc16 s390x/arch_dump: use proper note name and note size
In binutils/libbfd (bfd/elf.c) it is enforced that all s390
specific ELF notes like e.g. NT_S390_PREFIX or NT_S390_CTRS
have "LINUX" specified as note name and that the namesz is
6. Otherwise the notes are ignored.

QEMU currently uses "CORE" for these notes. Up to now this has
not been a real problem because the dump analysis tool "crash"
does handle that. But it will break all programs that use libbfd
for processing ELF notes.

So fix this and use "LINUX" for all s390 specific notes to comply
with libbfd. Also set the correct namesz.

Reported-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
b1914b824a virtio-ccw: support VIRTIO_QUEUE_MAX virtqueues
The maximal number of virtqueues per device can be limited on a per
transport basis. For virtio-ccw this limit is defined by
VIRTIO_CCW_QUEUE_MAX, however the limitation used to come form the
number of adapter routes supported by flic (via notifiers).

Recently the limitation of the flic was adjusted so that it can
accommodate VIRTIO_QUEUE_MAX queues, and is in the meanwhile checked for
separately too.

Let us remove the transport specific limitation of virtio-ccw by
dropping VIRTIO_CCW_QUEUE_MAX and using VIRTIO_QUEUE_MAX instead.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
069097dad3 s390x: bump ADAPTER_ROUTES_MAX_GSI
Let's increase ADAPTER_ROUTES_MAX_GSI to VIRTIO_QUEUE_MAX which is the
largest demand foreseeable at the moment. Let us add a compatibility
macro for the previous machines so client code can maintain backwards
migration compatibility

To not mess up migration compatibility for virtio-ccw
VIRTIO_CCW_QUEUE_MAX is left at it's current value, and will be dropped
when virtio-ccw is converted to use the capability of the flic
introduced by this patch.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
0708afa704 virtio-ccw: check flic->adapter_routes_max_batch
Currently VIRTIO_CCW_QUEUE_MAX is defined as ADAPTER_ROUTES_MAX_GSI.
That is when checking queue max we implicitly check the constraint
concerning the number of adapter routes. This won't be satisfactory any
more (due to backward migration considerations) if ADAPTER_ROUTES_MAX_GSI
changes (ADAPTER_ROUTES_MAX_GSI is going to change because we want to
support up to VIRTIO_QUEUE_MAX queues per virtio-ccw device).

Let us introduce a check on a recently introduce flic property which
gives us the compatibility machine aware limit on adapter routes.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
e61cc6b5c6 s390x: add property adapter_routes_max_batch
To make virtio-ccw supports more that  64 virtqueues we will have to
increase ADAPTER_ROUTES_MAX_GSI which is currently limiting the number if
possible adapter routes. Of course increasing the number of supported
routes can break backwards migration.

Let us introduce a compatibility property adapter_routes_max_batch so
client code can use the some old limit if in compatibility mode and
retain the migration compatibility.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
797b608638 virtio-ccw: Check the number of vqs in CCW_CMD_SET_IND
We cannot support more than 64 virtqueues with the 64 bits provided by
classic indicators. If a driver tries to setup classic indicators
(which it is free to do even for virtio-1 devices) for a device with
more than 64 virtqueues, we should reject the attempt so that the
driver does not end up with an unusable device.

This is in preparation for bumping the number of supported virtqueues
on the ccw transport.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
d2256070d2 virtio-ccw: add virtio-crypto-ccw device
Wire up virtio-crypto for the CCW based VIRTIO.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
47e13dfd86 virtio-ccw: handle virtio 1 only devices
As a preparation for wiring-up virtio-crypto, the first non-transitional
virtio device on the ccw transport, let us introduce a mechanism for
disabling revision 0.  This is more or less equivalent with disabling
legacy as revision 0 is legacy only, and legacy drivers use the revision
0 exclusively.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Cornelia Huck
ba690c7171 s390x/flic: fail migration on source already
Current code puts a 'FLIC_FAILED' marker into the migration stream
to indicate something went wrong while saving flic state and fails
load if it encounters that marker. VMState's put routine recently
gained the ability to return error codes (but did not wire it up
yet).

In order to be able to reap the benefits of returning an error and
failing migration on the source already once this gets wired up
in core, return an error in addition to storing 'FLIC_FAILED'.

Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-02-24 10:15:18 +01:00
Christian Borntraeger
409422cd83 s390x/kvm: detect some program check loops
Sometimes (e.g. early boot) a guest is broken in such ways that it loops
100% delivering operation exceptions (illegal operation) but the pgm new
PSW is not set properly. This will result in code being read from
address zero, which usually contains another illegal op. Let's detect
this case and put the guest in crashed state. Instead of only detecting
this for address zero apply a heuristic that will work for any program
check new psw so that it will also reach the crashed state if you
provide some random elf file to the -kernel option.
We do not want guest problem state to be able to trigger a guest panic,
e.g. by faulting on an address that is the same as the program check
new PSW, so we check for the problem state bit being off.

With this we
a: get rid of CPU consumption of such broken guests
b: keep the program old PSW. This allows to find out the original illegal
   operation - making debugging such early boot issues much easier than
   with single stepping

This relies on the kernel using a similar heuristic and passing such
operation exceptions to user space.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Cornelia Huck
94b5024b1f s390x/s390-virtio: get rid of DPRINTF
The DPRINTF approach is likely to introduce bitrot, and the preferred
way for debugging is tracing anyway. Fortunately, there are no users
(left), so nuke it.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-02-24 10:15:18 +01:00
Fam Zheng
a8f159d45b docker: Install python2 explicitly in docker image
Python is no longer installed implicitly, but the QEMU build system
requires it. List it in PACKAGES.

Reported-by: Auger Eric <eric.auger@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170222021801.28658-1-famz@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
e70dc7f854 MAINTAINERS: merge Build and test automation with Docker tests
The docker framework is really just another piece in the build
automation puzzle so lets merge it together. For added bonus I've also
included the Travis and Patchew status links. The Shippable links will
be added later once mainline tests have been configured and setup.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170220105139.21581-5-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
d92d886a3b .shippable.yml: new CI provider
Ostensibly Shippable offers a similar set of services as Travis.
However they are focused on Docker container based work-flows so we
can use our existing containers to run a few extra builds - in this
case a bunch of cross-compiled targets on a Debian multiarch system.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170220105139.21581-4-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
24e0131f37 new: debian docker targets for cross-compiling
This provides a basic Debian install with access to the emdebian cross
compilers. The debian-armhf-cross and debian-arm64-cross targets build
on the basic Debian image to allow cross compiling to those targets.

A new environment variable (QEMU_CONFIGURE_OPTS) is set as part of the
docker container and passed to the build to specify the
--cross-prefix. The user still calls the build in the usual way, for
example:

  make docker-test-build@debian-arm64-cross \
    TARGET_LIST="aarch64-softmmu,aarch64-linux-user"

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170220105139.21581-3-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
414a8ce57e tests/docker: add basic user mapping support
Currently all docker builds are done by exporting a tarball to the
docker container and running the build as the containers root user.
Other use cases are possible however and it is possible to map a part
of users file-system to the container. This is useful for example for
doing cross-builds of arbitrary source trees. For this to work
smoothly the container needs to have a user created that maps cleanly
to the host system.

This adds a -u option to the docker script so that:

  DEB_ARCH=armhf DEB_TYPE=stable ./tests/docker/docker.py build \
    -u --include-executable=arm-linux-user/qemu-arm \
    debian:armhf ./tests/docker/dockerfiles/debian-bootstrap.docker

Will build a container that can then be run like:

  docker run --rm -it -v /home/alex/lsrc/qemu/risu.git/:/src \
    --user=alex:alex -w /src/ debian:armhf \
    sh -c "make clean && ./configure -s && make"

All docker containers built will add the current user unless
explicitly disabled by specifying NOUSER when invoking the Makefile:

  make docker-image-debian-armhf-cross NOUSER=1

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170220105139.21581-2-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Markus Armbruster
75cdcd1553 option: Fix checking of sizes for overflow and trailing crap
parse_option_size()'s checking for overflow and trailing crap is
wrong.  Has always been that way.  qemu_strtosz() gets it right, so
use that.

This adds support for size suffixes 'P', 'E', and ignores case for all
suffixes, not just 'k'.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-25-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
f46bfdbfc8 util/cutils: Change qemu_strtosz*() from int64_t to uint64_t
This will permit its use in parse_option_size().

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com> (maintainer:X86)
Cc: Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
Cc: Max Reitz <mreitz@redhat.com> (supporter:Block layer core)
Cc: qemu-block@nongnu.org (open list:Block layer core)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1487708048-2131-24-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
f17fd4fdf0 util/cutils: Return qemu_strtosz*() error and value separately
This makes qemu_strtosz(), qemu_strtosz_mebi() and
qemu_strtosz_metric() similar to qemu_strtoi64(), except negative
values are rejected.

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com> (maintainer:X86)
Cc: Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
Cc: Max Reitz <mreitz@redhat.com> (supporter:Block layer core)
Cc: qemu-block@nongnu.org (open list:Block layer core)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1487708048-2131-23-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
4fcdf65ae2 util/cutils: Let qemu_strtosz*() optionally reject trailing crap
Change the qemu_strtosz() & friends to return -EINVAL when @endptr is
null and the conversion doesn't consume the string completely.
Matches how qemu_strtol() & friends work.

Only test_qemu_strtosz_simple() passes a null @endptr.  No functional
change there, because its conversion consumes the string.

Simplify callers that use @endptr only to fail when it doesn't point
to '\0' to pass a null @endptr instead.

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com> (maintainer:X86)
Cc: Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
Cc: Max Reitz <mreitz@redhat.com> (supporter:Block layer core)
Cc: qemu-block@nongnu.org (open list:Block layer core)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1487708048-2131-22-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
606caa0a2a qemu-img: Wrap cvtnum() around qemu_strtosz()
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Max Reitz <mreitz@redhat.com>
Cc: qemu-block@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-21-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
dab9cc9237 test-cutils: Drop suffix from test_qemu_strtosz_simple()
Leave testing unit suffixes to test_qemu_strtosz_units().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-20-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
753f8da0e0 test-cutils: Use qemu_strtosz() more often
Use qemu_strtosz() instead of qemu_strtosz_MiB() where it doesn't
really make a difference.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-19-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
17f942560e util/cutils: Drop QEMU_STRTOSZ_DEFSUFFIX_* macros
Writing QEMU_STRTOSZ_DEFSUFFIX_* instead of '*' gains nothing.  Get
rid of these eyesores.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-18-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
466dea14e6 util/cutils: New qemu_strtosz()
Most callers of qemu_strtosz_suffix() pass QEMU_STRTOSZ_DEFSUFFIX_B.
Capture the pattern in new qemu_strtosz().

Inline qemu_strtosz_suffix() into its only remaining caller.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-17-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
e591591b32 util/cutils: Rename qemu_strtosz() to qemu_strtosz_MiB()
With qemu_strtosz(), no suffix means mebibytes.  It's used rarely.
I'm going to add a similar function where no suffix means bytes.
Rename qemu_strtosz() to qemu_strtosz_MiB() to make the name
qemu_strtosz() available for the new function.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-16-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
d2734d2629 util/cutils: New qemu_strtosz_metric()
To parse numbers with metric suffixes, we use

    qemu_strtosz_suffix_unit(nptr, &eptr, QEMU_STRTOSZ_DEFSUFFIX_B, 1000)

Capture this in a new function for legibility:

    qemu_strtosz_metric(nptr, &eptr)

Replace test_qemu_strtosz_suffix_unit() by test_qemu_strtosz_metric().

Rename qemu_strtosz_suffix_unit() to do_strtosz() and give it internal
linkage.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-15-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
0b742797aa test-cutils: Cover qemu_strtosz() around range limits
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-14-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
a6b4373fa2 test-cutils: Cover qemu_strtosz() with trailing crap
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-13-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
18aec47967 test-cutils: Cover qemu_strtosz() invalid input
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-12-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
019144b286 test-cutils: Add missing qemu_strtosz()... endptr checks
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-11-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
3403e5eb88 option: Fix to reject invalid and overflowing numbers
parse_option_number() fails to check for these errors after
strtoull().  Has always been broken.  Fix that.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-10-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
4baef2679e util/cutils: Clean up control flow around qemu_strtol() a bit
Reorder check_strtox_error() to make it obvious that we always store
through a non-null @endptr.

Transform

    if (some error) {
        error case ...
        err = value for error case;
    } else {
        normal case ...
        err = value for normal case;
    }
    return err;

to

    if (some error) {
        error case ...
        return value for error case;
    }
    normal case ...
    return value for normal case;

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-9-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
717adf9609 util/cutils: Clean up variable names around qemu_strtol()
Name same things the same, different things differently.

* qemu_strtol()'s parameter @nptr is called @p in
  check_strtox_error().  Rename the latter.

* qemu_strtol()'s parameter @endptr is called @next in
  check_strtox_error().  Rename the latter.

* qemu_strtol()'s variable @p is called @endptr in
  check_strtox_error().  Rename both to @ep.

* qemu_strtol()'s variable @err is *negative* errno,
  check_strtox_error()'s parameter @err is *positive*.  Rename the
  latter to @libc_errno.

Same for qemu_strtoul(), qemu_strtoi64(), qemu_strtou64(), of course.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-8-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
b30d188677 util/cutils: Rename qemu_strtoll(), qemu_strtoull()
The name qemu_strtoll() suggests conversion to long long, but it
actually converts to int64_t.  Rename to qemu_strtoi64().

The name qemu_strtoull() suggests conversion to unsigned long long,
but it actually converts to uint64_t.  Rename to qemu_strtou64().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-7-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
4295f879be util/cutils: Rewrite documentation of qemu_strtol() & friends
Fixes the following documentation bugs:

* Fails to document that null @nptr is safe.

* Fails to document that we return -EINVAL when no conversion could be
  performed (commit 47d4be1).

* Confuses long long with int64_t, and unsigned long long with
  uint64_t.

* Claims the unsigned conversions can underflow.  They can't.

While there, mark problematic assumptions that int64_t is long long,
and uint64_t is unsigned long long with FIXME comments.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-6-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
bc7c08a2c3 test-cutils: Clean up qemu_strtoul() result checks
Use unsigned comparisons to check the result of qemu_strtoul() and
strtoull().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-5-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
73245450b3 test-cutils: Add missing qemu_strtol()... endptr checks
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-4-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
8ee8409eff option: Assert value string isn't null
Plenty of code relies on QemuOpt member @str not being null, including
qemu_opts_print(), qemu_opts_to_qdict(), and callbacks passed to
qemu_opt_foreach().

Begs the question whether it can be null.  Only opt_set() creates
QemuOpt.  It sets member @str to its argument @value.  Passing null
for @value would plant a time bomb.  Callers:

* opts_do_parse() can't pass null.

* qemu_opt_set() passes its argument @value.  Callers:

  - qemu_opts_from_qdict_1() can't pass null

  - qemu_opts_set() passes its argument @value, but none of its
    callers pass null.

  - Many more outside qemu-option.c, but they shouldn't pass null,
    either.

Assert member @str isn't null, so that misuse is caught right away.

Simplify parse_option_bool(), parse_option_number() and
parse_option_size() accordingly.  Best viewed with whitespace changes
ignored.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-3-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
694baf57ae test-qemu-opts: Cover qemu_opts_parse()
The new tests demonstrate a few bugs, all clearly marked.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-2-git-send-email-armbru@redhat.com>
[A few additional test cases squashed in, see
Message-ID: <871supjijq.fsf@dusky.pond.sub.org>]
2017-02-23 20:34:24 +01:00
Peter Maydell
2d896b454a Revert "hw/mips: MIPS Boston board support"
This reverts commit d3473e147a.

This commit creates a board which defaults to having 2GB of RAM.
Unfortunately on 32-bit hosts we can't create boards with 2GB of RAM,
and so 'make check' fails. I missed this during testing of the
merge, unfortunately. Luckily the offending commit is the last
one in the merge request, so we can just revert it for now.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 18:04:45 +00:00
Gerd Hoffmann
4f72b8d2a6 xhci: properties cleanup
Split xhci properties into common and nec specific.

Move the backward compat flags to nec, so the new qemu-xhci
devices doesn't carry on the compatibiity stuff.

Move the msi/msix switches too and just enable msix for qemu-xhci.

Also move the intrs and slots properties.  Wasn't a great idea to
make them configurable in the first place, nobody needs this.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1487663432-10410-1-git-send-email-kraxel@redhat.com
2017-02-23 16:18:03 +01:00
Li Qiang
6ebc069d67 usb: ohci: fix error return code in servicing td
It should return 1 if an error occurs when reading td.
This will avoid an infinite loop issue in ohci_service_ed_list.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1487760990-115925-1-git-send-email-liqiang6-s@360.cn
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-23 16:18:03 +01:00
Marc-André Lureau
c4fe9700e6 usb: replace handle_destroy with unrealize
Curiously, unrealize() is not being used, but it seems more
appropriate than handle_destroy() together with realize(). It is more
ubiquitous destroy name in qemu code base and may throw errors.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170221141451.28305-25-marcandre.lureau@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-23 15:40:19 +01:00
Peter Maydell
10f25e4844 Merge remote-tracking branch 'remotes/yongbok/tags/mips-20170222' into staging
MIPS patches 2017-02-22

Changes:
* Add MIPS Boston board support

# gpg: Signature made Wed 22 Feb 2017 00:08:00 GMT
# gpg:                using RSA key 0x2238EB86D5F797C2
# gpg: Good signature from "Yongbok Kim <yongbok.kim@imgtec.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 8600 4CF5 3415 A5D9 4CFA  2B5C 2238 EB86 D5F7 97C2

* remotes/yongbok/tags/mips-20170222:
  hw/mips: MIPS Boston board support
  hw: xilinx-pcie: Add support for Xilinx AXI PCIe Controller
  loader: Support Flattened Image Trees (FIT images)
  dtc: Update requirement to v1.4.2
  target-mips: Provide function to test if a CPU supports an ISA
  hw/mips_gic: Update pin state on mask changes
  hw/mips_gictimer: provide API for retrieving frequency
  hw/mips_cmgcr: allow GCR base to be moved

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 09:59:40 +00:00
XiongZhang
c2b2e158cc vfio/pci-quirks.c: Disable stolen memory for igd VFIO
Regardless of running in UPT or legacy mode, the guest igd
drivers may attempt to use stolen memory, however only legacy
mode has BIOS support for reserving stolen memmory in the
guest VM. We zero out the stolen memory size in all cases,
then guest igd driver won't use stolen memory.
In legacy mode, user could use x-igd-gms option to specify the
amount of stolen memory which will be pre-allocated and reserved
by bios for igd use.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99028
          https://bugs.freedesktop.org/show_bug.cgi?id=99025

Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Tested-by: Terrence Xu <terrence.xu@intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-22 13:19:59 -07:00
Alex Williamson
d0d1cd70d1 vfio/pci: Improve extended capability comments, skip masked caps
Since commit 4bb571d857 ("pci/pcie: don't assume cap id 0 is
reserved") removes the internal use of extended capability ID 0, the
comment here becomes invalid.  However, peeling back the onion, the
code is still correct and we still can't seed the capability chain
with ID 0, unless we want to muck with using the version number to
force the header to be non-zero, which is much uglier to deal with.
The comment also now covers some of the subtleties of using cap ID 0,
such as transparently indicating absence of capabilities if none are
added.  This doesn't detract from the correctness of the referenced
commit as vfio in the kernel also uses capability ID zero to mask
capabilties.  In fact, we should skip zero capabilities precisely
because the kernel might also expose such a capability at the head
position and re-introduce the problem.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Tested-by: Peter Xu <peterx@redhat.com>
Reported-by: Jintack Lim <jintack@cs.columbia.edu>
Tested-by: Jintack Lim <jintack@cs.columbia.edu>
2017-02-22 13:19:58 -07:00
Alex Williamson
35c7cb4caf vfio/pci: Report errors from qdev_unplug() via device request
Currently we ignore this error, report it with error_reportf_err()

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-02-22 13:19:58 -07:00
Markus Armbruster
7c81e4e9db block: Don't bother asserting type of output visitor's output
After a visit of a complex QAPI type FOO

    ov = qobject_output_visitor_new(&foo);
    visit_type_FOO(ov, NULL, expr, &error_abort);
    visit_complete(ov, &foo);

we can safely assume qobject_type(foo) is QTYPE_QDICT.  We do in many
places, but occasionally assert qobject_type(obj) == QTYPE_QDICT.
Don't.  The appropriate place to check such fundamental properties of
QAPI visitors is the test suite.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-15-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:20 +01:00
Markus Armbruster
bbf1028a0a monitor: Clean up handle_hmp_command() a bit
Leave checking qobject_type(req) to qmp_check_input_obj().  Rework
handling of json_parser_parse_err() failing without setting an error.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-14-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:17 +01:00
Markus Armbruster
dfad9ec4e9 tests: Don't check qobject_type() before qobject_to_qbool()
qobject_to_qbool(obj) returns NULL when obj isn't a QBool.  Check
that instead of qobject_type(obj) == QTYPE_QBOOL.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-13-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:14 +01:00
Markus Armbruster
8978b34af3 tests: Don't check qobject_type() before qobject_to_qfloat()
qobject_to_qfloat(obj) returns NULL when obj isn't a QFloat.  Check
that instead of qobject_type(obj) == QTYPE_QFLOAT.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-12-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:11 +01:00
Markus Armbruster
0abfc4b885 tests: Don't check qobject_type() before qobject_to_qint()
qobject_to_qint(obj) returns NULL when obj isn't a QInt.  Check
that instead of qobject_type(obj) == QTYPE_QINT.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-11-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:09 +01:00
Markus Armbruster
363e13f86e tests: Don't check qobject_type() before qobject_to_qstring()
qobject_to_qstring(obj) returns NULL when obj isn't a QString.  Check
that instead of qobject_type(obj) == QTYPE_QSTRING.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-10-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:06 +01:00
Markus Armbruster
cd17ba51f5 tests: Don't check qobject_type() before qobject_to_qlist()
qobject_to_qlist(obj) returns NULL when obj isn't a QList.  Check
that instead of qobject_type(obj) == QTYPE_QLIST.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-9-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:04 +01:00
Markus Armbruster
ca6b6e1e68 Don't check qobject_type() before qobject_to_qdict()
qobject_to_qdict(obj) returns NULL when obj isn't a QDict.  Check
that instead of qobject_type(obj) == QTYPE_QDICT.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-8-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:01 +01:00
Markus Armbruster
4b32e11a59 test-qmp-event: Simplify and tighten event_test_emit()
Use qdict_get_qdict() and qdict_get_try_int() to simplify.

While there, add a sanity check for seconds.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-7-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:59 +01:00
Markus Armbruster
4d96f329cc libqtest: Clean up qmp_response() a bit
Use qobject_to_qdict() instead of a type cast.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-6-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:56 +01:00
Markus Armbruster
9eaaf97168 check-qjson: Simplify around compare_litqobj_to_qobj()
Make compare_litqobj_to_qobj() cope with null, and drop non-null
assertions from callers.

compare_litqobj_to_qobj() already checks the QType matches; drop the
redundant assertions from callers.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-5-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:54 +01:00
Markus Armbruster
a68931ea5f check-qdict: Tighten qdict_crumple_test_recursive() some
Consistently check for unexpected QDict entries, and qdict_get_qdict()
success.  The latter doesn't tighten the test, it only makes it fail
more nicely.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-4-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:51 +01:00
Markus Armbruster
ff9d38963e check-qdict: Simplify qdict_crumple_test_recursive()
Use qdict_get_qdict(), qdict_get_qlist() instead of qdict_get()
followed by qobject_to_qdict(), qobject_to_qlist().

While there, drop some redundant code.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-3-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:48 +01:00
Markus Armbruster
b25f23e7db qdict: Make qdict_get_qlist() safe like qdict_get_qdict()
Commit 89cad9f changed qdict_get_qdict() to return NULL instead of
crash when the key doesn't exist or its value isn't a QDict.
Commit 2d6421a neglected to do the same for qdict_get_qlist().
Correct that, and update the function comments.

qdict_get_obj() is now unused, remove.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-2-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:37 +01:00
Markus Armbruster
d3be4b57ce net: Flatten simple union NetLegacyOptions
Simple unions are simpler than flat unions in the schema, but more
complicated in C and on the QMP wire: there's extra indirection in C
and extra nesting on the wire, both pointless.  They're best avoided
in new code.

NetLegacyOptions isn't new, but it's only used internally, not in QMP.
Convert it to a flat union.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487709988-14322-3-git-send-email-armbru@redhat.com>
2017-02-22 19:50:52 +01:00
Markus Armbruster
d081a49af8 numa: Flatten simple union NumaOptions
Simple unions are simpler than flat unions in the schema, but more
complicated in C and on the QMP wire: there's extra indirection in C
and extra nesting on the wire, both pointless.  They're best avoided
in new code.

NumaOptions isn't new, but it's only used internally, not in QMP.
Convert it to a flat union.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487709988-14322-2-git-send-email-armbru@redhat.com>
2017-02-22 19:50:46 +01:00
Peter Maydell
fb6971c110 hw/ppc/ppc405_uc.c: Avoid integer overflows
When performing clock calculations, the ppc405_uc code
has several places where it multiplies together two
32-bit variables and assigns the result to a 64-bit
variable. This doesn't quite do what is intended because
C will compute a 32-bit multiply result. Add casts to
ensure we don't truncate the result.

(Spotted by Coverity, CID 1005504, 1005505.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Thomas Huth
df58713396 hw/ppc/spapr: Check for valid page size when hot plugging memory
On POWER, the valid page sizes that the guest can use are bound
to the CPU and not to the memory region. QEMU already has some
fancy logic to find out the right maximum memory size to tell
it to the guest during boot (see getrampagesize() in the file
target/ppc/kvm.c for more information).
However, once we're booted and the guest is using huge pages
already, it is currently still possible to hot-plug memory regions
that does not support huge pages - which of course does not work
on POWER, since the guest thinks that it is possible to use huge
pages everywhere. The KVM_RUN ioctl will then abort with -EFAULT,
QEMU spills out a not very helpful error message together with
a register dump and the user is annoyed that the VM unexpectedly
died.
To avoid this situation, we should check the page size of hot-plugged
DIMMs to see whether it is possible to use it in the current VM.
If it does not fit, we can print out a better error message and
refuse to add it, so that the VM does not die unexpectely and the
user has a second chance to plug a DIMM with a matching memory
backend instead.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1419466
Signed-off-by: Thomas Huth <thuth@redhat.com>
[dwg: Fix a build error on 32-bit builds with KVM]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Alex Zuepke
0a4c774086 target-ppc: fix Book-E TLB matching
The Book-E TLB matching process should bail out early when a TLB
entry matches, but the access permissions are wrong. The CPU
will then raise a DSI error instead of a Data TLB error, as
described for TLB matching in Freescale and IBM documents.

Signed-off-by: Alex Zuepke <azu@sysgo.de>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Sam Bobroff
87684b4c40 hw/net/spapr_llan: 6 byte mac address device tree entry
The spapr-vlan device in QEMU has always presented it's MAC address in
the device tree as an 8 byte value, even though PAPR requires it to be
6 bytes.  This is because, at the time, AIX required the value to be 8
bytes.  However, modern versions of AIX support the (correct) 6
byte value so they no longer require the workaround.

It would be neatest to always provide a 6 byte value but that would
cause a problem with old Linux kernel ibmveth drivers, so the old 8
byte value is still presented when necessary.

Since commit 13f85203e (3.10, May 2013) the driver has been able to
handle 6 or 8 byte addresses so versions after that don't need to be
considered specially.

Drivers from kernels before that can also handle either type of
address, but not always:
* If the first byte's lowest bits are 10, the address must be 6 bytes.
* Otherwise, the address must be 8 bytes.
(The two bits in question are significant in a MAC address: they
indicate a locally-administered unicast address.)

So to maintain compatibility the old 8 byte value is presented when
the lowest two bits of the first byte are not 10.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Igor Mammedov
c5514d0e4b machine: replace query_hotpluggable_cpus() callback with has_hotpluggable_cpus flag
Generic helper machine_query_hotpluggable_cpus() replaced
target specific query_hotpluggable_cpus() callbacks so
there is no need in it anymore. However inon NULL callback
value is used to detect/report hotpluggable cpus support,
therefore it can be removed completely.
Replace it with MachineClass.has_hotpluggable_cpus boolean
which is sufficient for the task.

Suggested-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
f2d672c248 machine: unify [pc_|spapr_]query_hotpluggable_cpus() callbacks
All callbacks FOO_query_hotpluggable_cpus() are practically
the same except of setting vcpus_count to different values.
Convert them to a generic machine_query_hotpluggable_cpus()
callback by moving vcpus_count initialization to per machine
specific callback possible_cpu_arch_ids().

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
535455fdee spapr: reuse machine->possible_cpus instead of cores[]
Replace SPAPR specific cores[] array with generic
machine->possible_cpus and store core objects there.
It makes cores bookkeeping similar to x86 cpus and
will allow to unify similar code.
It would allow to replace cpu_index based NUMA node
mapping with iproperty based one (for -device created
cores) since possible_cpus carries board defined
topology/layout.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
8aba384298 change CPUArchId.cpu type to Object*
so it could be reused for SPAPR cores as well

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
1ea69c0e25 pc: pass apic_id to pc_find_cpu_slot() directly so lookup could be done without CPU object
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
c67ae9333c pc: calculate topology only once when possible_cpus is initialised
Fill in CpuInstanceProperties once at board init time and
just copy them whenever query_hotpluggable_cpus() is called.
It will keep topology info always available without need
to recalculate it every time it's needed.
Considering it has NUMA node id, it will be used to keep
NUMA node to cpu mapping instead of numa_info[i].node_cpu
bitmasks.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
c96a1c0ba6 pc: move pcms->possible_cpus init out of pc_cpus_init()
possible_cpus could be initialized earlier then cpu objects,
i.e. when -smp is parsed so move init code to possible_cpu_arch_ids()
interface func and do initialization on the first call.

it should help later with making -numa cpu/-smp parsing a machine state
properties.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
38690a1ca7 machine: move possible_cpus to MachineState
so that it would be possible to reuse it with
spapr/virt-aarch64 targets.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Thomas Huth
fb38ebfbfe hw/pci-host/prep: Do not use hw_error() in realize function
hw_error() is for CPU related errors only (it prints out a
register dump and calls abort()), so we should not use it
if we just failed to load the bios image. Apart from that,
realize() functions should not exit directly but always set
the errp with error_setg() in case of errors instead.
Additionally, move some code around and delete the bios memory
subregion again in case of such an error, so that we leave a
clean state when returning to the caller.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
5065908361 target/ppc/POWER9: Direct all instr and data storage interrupts to the hypv
The vpm0 bit was removed from the LPCR in POWER9, this bit controlled
whether ISI and DSI interrupts were directed to the hypervisor or the
partition. These interrupts now go to the hypervisor irrespective, thus
it is no longer necessary to check the vmp0 bit in the LPCR.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
18aa49ecf4 target/ppc/POWER9: Adapt LPCR handling for POWER9
The logical partitioning control register controls a threads operation
based on the partition it is currently executing. Add new definitions and
update the mask used when writing to the LPCR based on the POWER9 spec.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
86cf1e9fe8 target/ppc/POWER9: Add ISAv3.00 MMU definition
POWER9 processors implement the mmu as defined in version 3.00 of the ISA.

Add a definition for this mmu model and set the POWER9 cpu model to use
this mmu model.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
7659ca1a3e target/ppc: Fix LPCR DPFD mask define
The DPFD field in the LPCR is 3 bits wide. This has always been defined
as 0x3 << shift which indicates a 2 bit field, which is incorrect.
Correct this.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
e0aee726bf target-ppc: Add xscvqpudz and xscvqpuwz instructions
xscvqpudz: VSX Scalar truncate & Convert Quad-Precision format to
           Unsigned Doubleword format
xscvqpuwz: VSX Scalar truncate & Convert Quad-Precision format to
           Unsigned Word format

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
a8d411abac target-ppc: Implement round to odd variants of quad FP instructions
xsaddqpo:  VSX Scalar Add Quad-Precision using round to Odd
xsmulqo:   VSX Scalar Multiply Quad-Precision using round to Odd
xsdivqpo:  VSX Scalar Divide Quad-Precision using round to Odd
xscvqpdpo: VSX Scalar round & Convert Quad-Precision format to
           Double-Precision format using round to Odd
xssqrtqpo: VSX Scalar Square Root Quad-Precision using round to Odd
xssubqpo:  VSX Scalar Subtract Quad-Precision using round to Odd

In addition, fix the invalid bitmask in the instruction encoding
of xssqrtqp[o].

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
CC: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
fd425037d2 softfloat: Add float128_to_uint32_round_to_zero()
float128_to_uint32_round_to_zero() is needed by xscvqpuwz instruction
of PowerPC ISA 3.0.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
2e6d856835 softfloat: Add float128_to_uint64_round_to_zero()
Implement float128_to_uint64() and use that to implement
float128_to_uint64_round_to_zero()

This is required by xscvqpudz instruction of PowerPC ISA 3.0.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
9ee6f678f4 softfloat: Add round-to-odd rounding mode
Power ISA 3.0 introduces a few quadruple precision floating point
instructions that support round-to-odd rounding mode. The
round-to-odd mode is explained as under:

Let Z be the intermediate arithmetic result or the operand of a convert
operation. If Z can be represented exactly in the target format, the
result is Z. Otherwise the result is either Z1 or Z2 whichever is odd.
Here Z1 and Z2 are the next larger and smaller numbers representable
in the target format respectively.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Laurent Vivier
5b929608b9 spapr: replace debug printf with trace points
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Laurent Vivier
f4af7d4438 ppc4xx: replace debug printf with trace points
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Laurent Vivier
5283c27fc5 mac99: replace debug printf with trace points
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Sam Bobroff
2635531f20 target-ppc, tcg: fix usermode segfault with pthread_create()
Programs run under qemu-ppc64 on an x86_64 host currently segfault
if they use pthread_create() due to the adjustment made to the NIP in
commit bd6fefe71c.

This patch changes cpu_loop() to set the NIP back to the
pre-incremented value before calling do_syscall(), which causes the
correct address to be used for the new thread and corrects the fault.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
c09cec683b target-ppc: add wait instruction
Use the available wait instruction implementation.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
62d897ca8b target-ppc: add slbsync implementation
slbsync: SLB Synchoronize

The instruction provides an ordering function for the effects of all
slbieg instructions executed by the thread executing the slbsync
instruction.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
a63f1dfc62 target-ppc: add slbieg instruction
slbieg: SLB Invalidate Entry Global

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
80b8c1ee05 target-ppc: generate exception for copy/paste
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Balamuruhan S
a34011881c target-ppc: implement store atomic instruction
stwat: Store Word Atomic
stdat: Store Doubleword Atomic

The instruction includes as function code (5 bits) which gives a detail
on the operation to be performed. The patch implements five such
functions.

Signed-off-by: Balamuruhan S <bala24@linux.vnet.ibm.com>
Signed-off-by: Harish S <harisrir@linux.vnet.ibm.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[ implement stdat, use macro and combine both implementation ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Balamuruhan S
a68a614673 target-ppc: implement load atomic instruction
lwat: Load Word Atomic
ldat: Load Doubleword Atomic

The instruction includes as function code (5 bits) which gives a detail
on the operation to be performed. The patch implements five such
functions.

Signed-off-by: Balamuruhan S <bala24@linux.vnet.ibm.com>
Signed-off-by: Harish S <harisrir@linux.vnet.ibm.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[ combine both lwat/ldat implementation using macro ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Sam Bobroff
fe93e3e6ec spapr: fix off-by-one error in spapr_ovec_populate_dt()
The last byte of the option vector was missing due to an off-by-one
error. Without this fix, client architecture support negotiation will
fail because the last byte of option vector 5, which contains the MMU
support, will be missed.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Bharata B Rao
d4ccd87e68 target-ppc: Add xsmaxjdp and xsminjdp instructions
xsmaxjdp: VSX Scalar Maximum Type-J Double-Precision
xsminjdp: VSX Scalar Minimum Type-J Double-Precision

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Bharata B Rao
2770deede0 target-ppc: Add xsmaxcdp and xsmincdp instructions
xsmaxcdp: VSX Scalar Maximum Type-C Double-Precision
xsmincdp: VSX Scalar Minimum Type-C Double-Precision

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Thomas Huth
802fc7abd0 hw/ppc/pnv: Remove superfluous "qemu" prefix from error strings
error_report() already puts a prefix with the program name in front
of the error strings, so the "qemu:" prefix is not necessary here
anymore.

Reported-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
f6b99afdc3 ppc: implement xssubqp instruction
xssubqp: VSX Scalar Subtract Quad-Precision.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
a4a68476de ppc: implement xssqrtqp instruction
xssqrtqp: VSX Scalar Square Root Quad-Precision.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
917950d7f5 ppc: implement xsrqpxp instruction
xsrqpxp: VSX Scalar Round Quad-Precision to Double-Extended Precision.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
be07ad5842 ppc: implement xsrqpi[x] instruction
xsrqpi[x]: VSX Scalar Round to Quad-Precision Integer
[with Inexact].

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Igor Mammedov
115debf26c spapr: make cpu core unplug follow expected hotunplug call flow
spapr_core_unplug() were essentially spapr_core_unplug_request()
handler that requested CPU removal and registered callback
which did actual cpu core removali but it was called from
spapr_machine_device_unplug() which is intended for actual object
removal. Commit (cf632463 spapr: Memory hot-unplug support)
sort of fixed it introducing spapr_machine_device_unplug_request()
and calling spapr_core_unplug() but it hasn't renamed callback and
by mistake calls it from spapr_machine_device_unplug().

However spapr_machine_device_unplug() isn't ever called for
cpu core since spapr_core_release() doesn't follow expected
hotunplug call flow which is:
 1: device_del() ->
        hotplug_handler_unplug_request() ->
            set destroy_cb()
 2: destroy_cb() ->
        hotplug_handler_unplug() ->
            object_unparent // actual device removal

Fix it by renaming spapr_core_unplug() to spapr_core_unplug_request()
which is called from spapr_machine_device_unplug_request() and
making spapr_core_release() call hotplug_handler_unplug() which
will call spapr_machine_device_unplug() -> spapr_core_unplug()
to remove cpu core.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reveiwed-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Igor Mammedov
ff9006ddbf spapr: move spapr_core_[foo]plug() callbacks close to machine code in spapr.c
spapr_core_pre_plug/spapr_core_plug/spapr_core_unplug() are managing
wiring CPU core into spapr machine state and not internal CPU core state.
So move them from spapr_cpu_core.c to spapr.c where other similar
(spapr_memory_[foo]plug()) callbacks are located, which also matches
x86 target practice.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Igor Mammedov
f844616bf6 spapr: cpu core: separate child threads destruction from machine state operations
Split off destroying VCPU threads from drc callback
spapr_core_release() into new spapr_cpu_core_unrealizefn()
which takes care of internal cpu core state cleanup (i.e.
VCPU threads) and is called when object_unparent(core)
is called.

That leaves spapr_core_release() only with board mgmt
code, which will be moved to board related file in
follow up patch along with the rest on hotplug callbacks.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Paul Burton
d3473e147a hw/mips: MIPS Boston board support
Introduce support for emulating the MIPS Boston development board. The
Boston board is built around an FPGA & 3 PCIe controllers, one of which
is connected to an Intel EG20T Platform Controller Hub. It is used
during the development & debug of new CPUs and the software intended to
run on them, and is essentially the successor to the older MIPS Malta
board.

This patch does not implement the EG20T, instead connecting an already
supported ICH-9 AHCI controller. Whilst this isn't accurate it's enough
for typical stock Boston software (eg. Linux kernels) to work with hard
disks given that both the ICH-9 & EG20T implement the AHCI
specification.

Boston boards typically boot kernels in the FIT image format, and this
patch will treat kernels provided to QEMU as such. When loading a kernel
directly, the board code will generate minimal firmware much as the
Malta board code does. This firmware will set up the CM, CPC & GIC
register base addresses then set argument registers & jump to the kernel
entry point. Alternatively, bootloader code may be loaded using the bios
argument in which case no firmware will be generated & execution will
proceed from the start of the boot code at the default MIPS boot
exception vector (offset 0x1fc00000 into (c)kseg1).

Currently real Boston boards are always used with FPGA bitfiles that
include a Global Interrupt Controller (GIC), so the interrupt
configuration is only defined for such cases. Therefore the board will
only allow use of CPUs which implement the CPS components, including the
GIC, and will otherwise exit with a message.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Yongbok Kim <yongbok.kim@imgtec.com>
[yongbok.kim@imgtec.com:
  isolated boston machine support for mips64el.
  updated for recent Chardev changes.
  ignore missing bios/kernel for qtest.
  added default -drive to if=ide explicitly]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 23:49:30 +00:00
Paul Burton
62be393423 hw: xilinx-pcie: Add support for Xilinx AXI PCIe Controller
Add support for emulating the Xilinx AXI Root Port Bridge for PCI
Express as described by Xilinx' PG055 document. This is a PCIe
controller that can be used with certain series of Xilinx FPGAs, and is
used on the MIPS Boston board which will make use of this code.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
[yongbok.kim@imgtec.com:
  removed returning on !level,
  updated IRQ connection with GPIO logic,
  moved xilinx_pcie_init() to boston.c
  replaced stw_le_p() with pci_set_word()
  and other cosmetic changes]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 23:49:29 +00:00
Paul Burton
51b58561c1 loader: Support Flattened Image Trees (FIT images)
Introduce support for loading Flattened Image Trees, as used by modern
U-Boot. FIT images are essentially flattened device tree files which
contain binary images such as kernels, FDTs or ramdisks along with one
or more configuration nodes describing boot configurations.

The MIPS Boston board typically boots kernels in the form of FIT images,
and will make use of this code.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
[yongbok.kim@imgtec.com:
  fixed potential memory leaks,
  isolated building option]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 23:47:40 +00:00
Paul Burton
6e85fce022 dtc: Update requirement to v1.4.2
In order to obtain fdt_first_subnode & fdt_next_subnode symbols from
libfdt for use by a later patch, bump the requirement for dtc to v1.4.2
& the submodule to that same version.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Yongbok Kim <yongbok.kim@imgtec.com>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
bed9e5ceb1 target-mips: Provide function to test if a CPU supports an ISA
Provide a new cpu_supports_isa function which allows callers to
determine whether a CPU supports one of the ISA_ flags, by testing
whether the associated struct mips_def_t sets the ISA flags in its
insn_flags field.

An example use of this is to allow boards which generate bootloader code
to determine the properties of the CPU that will be used, for example
whether the CPU is 64 bit or which architecture revision it implements.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
2e2a1b4648 hw/mips_gic: Update pin state on mask changes
If the GIC interrupt mask is changed by a write to the smask (set mask)
or rmask (reset mask) registers, we need to re-evaluate the state of the
pins/IRQs fed to the CPU. Without doing so we risk leaving a pin high
despite the interrupt that led to that state being masked, or losing
interrupts if an already pending interrupt is unmasked.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
eb90ab9437 hw/mips_gictimer: provide API for retrieving frequency
Provide a new function mips_gictimer_get_freq() which returns the
frequency at which a GIC timer will count. This will be useful for
boards which perform setup based upon this frequency.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
08944be1d9 hw/mips_cmgcr: allow GCR base to be moved
Support moving the GCR base address & updating the CPU's CP0 CMGCRBase
register appropriately. This is required if a platform needs to move its
GCRs away from other memory, as the MIPS Boston development board does
to avoid its flash memory.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Peter Maydell
e295a154c2 Merge remote-tracking branch 'remotes/dgilbert/tags/pull-hmp-20170221' into staging
HMP pull

Note, I had seen a fail in the vhost-user/flags-mismatch on one
host in one build, but not others with the same patches; and these patches
go nowhere near that, so I think that's a separate vhost-user issue.

# gpg: Signature made Tue 21 Feb 2017 18:49:25 GMT
# gpg:                using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-hmp-20170221:
  monitor: Fix crashes when using HMP commands without CPU
  monitor: add poll-* properties into query-iothreads result
  hmp: fix block_set_io_throttle

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 19:41:57 +00:00
Thomas Huth
854e67fea6 monitor: Fix crashes when using HMP commands without CPU
When running certain HMP commands ("info registers", "info cpustats",
"info tlb", "nmi", "memsave" or dumping virtual memory) with the "none"
machine, QEMU crashes with a segmentation fault. This happens because the
"none" machine does not have any CPUs by default, but these HMP commands
did not check for a valid CPU pointer yet. Add such checks now, so we get
an error message about the missing CPU instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1484309555-1935-1-git-send-email-thuth@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-21 18:29:01 +00:00
Pavel Hrdina
5fc00480ab monitor: add poll-* properties into query-iothreads result
IOthreads were recently extended by new properties that can
enable/disable and configure aio polling.  This will also allow
other tools that uses QEMU to probe for existence of those new
properties via query-qmp-schema.

Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
Message-Id: <3163c16d6ab4257f7be9ad44fe9cc0ce8c359e5a.1486718555.git.phrdina@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-21 18:29:01 +00:00
Eric Blake
3f35c3b166 hmp: fix block_set_io_throttle
Commit 7a9877a made the 'device' parameter to BlockIOThrottle
optional, favoring 'id' instead.  But it forgot to update the
HMP usage to set has_device, which makes all attempts to change
throttling via HMP fail with "Need exactly one of 'device' and 'id'"

CC: qemu-stable@nongnu.org
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170120230359.4244-1-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-21 18:29:00 +00:00
Peter Maydell
796b288f7b Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Tue 21 Feb 2017 15:40:05 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  qemu-options: Fix broken sheepdog URL
  mirror: do not increase offset during initial zero_or_discard phase
  QAPI: Fix blockdev-add example documentation
  iscsi: Add blockdev-add support
  iscsi: Add timeout option
  iscsi: Add header-digest option
  iscsi: Add initiator-name option
  iscsi: Handle -iscsi user/password in bdrv_parse_filename()
  iscsi: Split URL into individual options

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 15:48:22 +00:00
Thomas Huth
6135c5e126 qemu-options: Fix broken sheepdog URL
The sheepdog URL is broken twice: First it uses a duplicated
http:// prefix, second the website seems to have moved to
https://sheepdog.github.io/sheepdog/ instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:38:09 -05:00
Anton Nefedov
90ab48eb07 mirror: do not increase offset during initial zero_or_discard phase
If explicit zeroing out before mirroring is required for the target image,
it moves the block job offset counter to EOF, then offset and len counters
count the image size twice. There is no harm but stats are confusing,
specifically the progress of the operation is always reported as 99% by
management tools.

The patch skips offset increase for the first "technical" pass over the
image. This should not cause any further harm.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1486045515-8009-1-git-send-email-den@openvz.org
CC: Jeff Cody <jcody@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:38:00 -05:00
Jeff Cody
b166099712 QAPI: Fix blockdev-add example documentation
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:46 -05:00
Kevin Wolf
31eb1202d3 iscsi: Add blockdev-add support
This adds blockdev-add support for iscsi devices.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:34 -05:00
Kevin Wolf
1d56010482 iscsi: Add timeout option
This was previously only available with -iscsi. Again, after this patch,
the -iscsi option only takes effect if an URL is given. New users are
supposed to use the new driver-specific option.

All -iscsi options have a corresponding driver-specific option for the
iscsi block driver now.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:26 -05:00
Kevin Wolf
81aa2a0fb5 iscsi: Add header-digest option
This was previously only available with -iscsi. Again, after this patch,
the -iscsi option only takes effect if an URL is given. New users are
supposed to use the new driver-specific option.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:16 -05:00
Kevin Wolf
d4e799292c iscsi: Add initiator-name option
This was previously only available with -iscsi. Again, after this patch,
the -iscsi option only takes effect if an URL is given. New users are
supposed to use the new driver-specific option.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:08 -05:00
Kevin Wolf
4317142020 iscsi: Handle -iscsi user/password in bdrv_parse_filename()
This splits the logic in the old parse_chap() function into a part that
parses the -iscsi options into the new driver-specific options, and
another part that actually applies those options (called apply_chap()
now).

Note that this means that username and password specified with -iscsi
only take effect when a URL is provided. This is intentional, -iscsi is
a legacy interface only supported for compatibility, new users should
use the proper driver-specific options.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:36:57 -05:00
Kevin Wolf
d5895fcb1d iscsi: Split URL into individual options
This introduces a .bdrv_parse_filename handler for iscsi which parses an
URL if given and translates it to individual options.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:36:34 -05:00
Peter Maydell
a1cf5fac2b Merge remote-tracking branch 'remotes/armbru/tags/pull-block-2017-02-21' into staging
Changes to -drive without if= and with if=scsi

# gpg: Signature made Tue 21 Feb 2017 12:22:35 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-block-2017-02-21:
  hw/i386: Deprecate -drive if=scsi with PC machine types
  hw: Deprecate -drive if=scsi with non-onboard HBAs
  hw/scsi: Concentrate -drive if=scsi auto-create in one place
  hw: Drop superfluous special checks for orphaned -drive
  blockdev: Make orphaned -drive fatal
  blockdev: Improve message for orphaned -drive
  hw/arm/highbank: Default -drive to if=ide instead of if=scsi
  hw: Default -drive to if=none instead of scsi when scsi cannot work
  hw: Default -drive to if=none instead of ide when ide cannot work
  hw/arm/cubieboard hw/arm/xlnx-ep108: Fix units_per_default_bus
  hw: Default -drive to if=ide explicitly where it works

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 13:58:50 +00:00
Markus Armbruster
f778a82f0c hw/i386: Deprecate -drive if=scsi with PC machine types
The PC machines (pc-q35-* pc-i440fx-* pc-* isapc xenfv) automatically
create lsi53c895a SCSI HBAs and SCSI devices to honor -drive if=scsi.
For giggles, try -drive if=scsi,bus=25,media=cdrom --- this makes QEMU
create 25 of them.

lsi53c895a is thoroughly obsolete (PCI Ultra2 SCSI, ca. 2000), and
currently has no maintainer in QEMU.  megasas is a better choice,
except with old OSes that lack drivers.  virtio-scsi is a much better
choice when you have a driver, but only (newish) Linux comes with one
in the box.  There is no good default that works for all guests.

Encourage users to pick a non-obsolete SCSI HBA that works for them by
deprecating -drive if=scsi.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487161136-9018-4-git-send-email-armbru@redhat.com>
Acked-By: Paolo Bonzini <pbonzini@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
a64aa5785d hw: Deprecate -drive if=scsi with non-onboard HBAs
Block backends defined with "-drive if=T" with T other than "none" are
meant to be picked up by machine initialization code: a suitable
frontend gets created and wired up automatically.

Drives defined with if=scsi are also picked up by SCSI HBAs added with
-device, unlike other interface types.  Deprecate this usage, as follows.

Create the frontends for onboard HBAs in machine initialization code,
exactly like we do for if=ide and other interface types.  Change
scsi_legacy_handle_cmdline() to create a frontend only when it's still
missing, and warn that this usage is deprecated.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487161136-9018-3-git-send-email-armbru@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
fb8b660e17 hw/scsi: Concentrate -drive if=scsi auto-create in one place
The logic to create frontends for -drive if=scsi is in SCSI HBAs.  For
all other interface types, it's in machine initialization code.

A few machine types create the SCSI HBAs necessary for that.  That's
also not done for other interface types.

I'm going to deprecate these SCSI eccentricities.  In preparation for
that, create the frontends in main() instead of the SCSI HBAs, by
calling new function scsi_legacy_handle_cmdline() there.

Note that not all SCSI HBAs create frontends.  Take care not to change
that.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487161136-9018-2-git-send-email-armbru@redhat.com>
Acked-By: Paolo Bonzini <pbonzini@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
8f2d75e81d hw: Drop superfluous special checks for orphaned -drive
We've traditionally rejected orphans here and there, but not
systematically.  For instance, the sun4m machines have an onboard SCSI
HBA (bus=0), and have always rejected bus>0.  Other machines with an
onboard SCSI HBA don't.

Commit a66c9dc made all orphans trigger a warning, and the previous
commit turned this into an error.  The checks "here and there" are now
redundant.  Drop them.

Note that the one in mips_jazz.c was wrong: it rejected bus > MAX_FD,
but MAX_FD is the number of floppy drives per bus.

Error messages change from

    $ qemu-system-x86_64 -drive if=ide,bus=2
    qemu-system-x86_64: Too many IDE buses defined (3 > 2)
    $ qemu-system-mips64 -M magnum,accel=qtest -drive if=floppy,bus=2,id=fd1
    qemu: too many floppy drives
    $ qemu-system-sparc -M LX -drive if=scsi,bus=1
    qemu: too many SCSI bus

to

    $ qemu-system-x86_64 -drive if=ide,bus=2
    qemu-system-x86_64: -drive if=ide,bus=2: machine type does not support if=ide,bus=2,unit=0
    $ qemu-system-mips64 -M magnum,accel=qtest -drive if=floppy,bus=2,id=fd1
    qemu-system-mips64: -drive if=floppy,bus=2,id=fd1: machine type does not support if=floppy,bus=2,unit=0
    $ qemu-system-sparc -M LX -drive if=scsi,bus=1
    qemu-system-sparc: -drive if=scsi,bus=1: machine type does not support if=scsi,bus=1,unit=0

Cc: John Snow <jsnow@redhat.com>
Cc: "Hervé Poussineau" <hpoussin@reactos.org>
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487153147-11530-9-git-send-email-armbru@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
720b8dc052 blockdev: Make orphaned -drive fatal
Block backends defined with "-drive if=T" with T other than "none" are
meant to be picked up by machine initialization code: a suitable
frontend gets created and wired up automatically.

If machine initialization code doesn't comply, the block backend
remains unused.  This triggers a warning since commit a66c9dc, v2.2.0.
Drives created by default are exempted; use -nodefaults to get rid of
them.

Turn this warning into an error.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487153147-11530-8-git-send-email-armbru@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
664cc623bf blockdev: Improve message for orphaned -drive
We warn when a -drive isn't supported by the machine type (commit
a66c9dc):

    $ qemu-system-x86_64 -S -display none -drive if=mtd
    Warning: Orphaned drive without device: id=mtd0,file=,if=mtd,bus=0,unit=0

Improve this to point to the offending bit of configuration:

    qemu-system-x86_64: -drive if=mtd: warning: machine type does not support if=mtd,bus=0,unit=0

Especially nice when it's hidden behind -readconfig foo.cfg:

    qemu-system-x86_64:foo.cfg:140: warning: machine type does not support if=mtd,bus=0,unit=0

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487153147-11530-7-git-send-email-armbru@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-02-21 13:17:40 +01:00
Markus Armbruster
2a7ae4ee50 hw/arm/highbank: Default -drive to if=ide instead of if=scsi
These machines have no onboard SCSI HBA, and no way to plug one.
-drive if=scsi therefore cannot work.  They do have an onboard IDE
controller (sysbus-ahci), but fail to honor if=ide.

Change their default to if=ide, and add a TODO comment on what needs
to be done to actually honor -drive if=ide.

Cc: Rob Herring <robh@kernel.org>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-arm@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1487153147-11530-6-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
7e465513c1 hw: Default -drive to if=none instead of scsi when scsi cannot work
Block backends defined with -drive if=scsi are meant to be picked up
by machine initialization code: a suitable frontend gets created and
wired up automatically.

if=scsi drives not picked up that way can still be used with -device
as if they had if=none, but that's unclean and best avoided.  Unused
ones produce an "Orphaned drive without device" warning.

A few machine types default to if=scsi, even though they don't
actually have a SCSI HBA.  This makes no sense.  Change their default
to if=none.  Affected machines:

* aarch64/arm: realview-pbx-a9 vexpress-a9 vexpress-a15 xilinx-zynq-a9

Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com>
Cc: Alistair Francis <alistair.francis@xilinx.com>
Cc: qemu-arm@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Message-Id: <1487153147-11530-5-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
a27fa28f03 hw: Default -drive to if=none instead of ide when ide cannot work
Block backends defined with -drive if=ide are meant to be picked up by
machine initialization code: a suitable frontend gets created and
wired up automatically.

if=ide drives not picked up that way can still be used with -device as
if they had if=none, but that's unclean and best avoided.  Unused ones
produce an "Orphaned drive without device" warning.

-drive parameter "if" is optional, and the default depends on the
machine type.  If a machine type doesn't specify a default, the
default is "ide".

Many machine types implicitly default to if=ide that way, even though
they don't actually have an IDE controller.  This makes no sense.

Change the implicit default to if=none.  Affected machines:

* all targets: none
* aarch64/arm: akita ast2500 canon cheetah collie connex imx25
  integratorcp kzm lm3s6965evb lm3s811evb mainstone musicpal n800 n810
  netduino2 nuri palmetto realview romulus sabrelite smdkc210 sx1 sx1
  verdex z2
* cris: axis-dev88
* i386/x86_64: xenpv
* lm32: lm32-evr lm32-uclinux milkymist
* m68k: an5206 dummy mcf5208evb
* microblaze/microblazeel: petalogix-ml605 petalogix-s3adsp1800
* mips/mips64/mips64el/mipsel: mipssim
* moxie: moxiesim
* or32: or32-sim
* ppc/ppc64/ppcemb: bamboo ref405ep taihu virtex-ml507
* ppc/ppc64: mpc8544ds ppce500
* sh4/sh4eb: shix
* sparc: leon3_generic
* sparc64: niagara
* tricore: tricore_testboard
* unicore32: puv3
* xtensa/xtensaeb: kc705 lx200 lx60 ml605 sim

None of these machines have an IDE controller, let alone code to
honor if=ide.

Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-arm@nongnu.org
Cc: Edgar E. Iglesias <edgar.iglesias@gmail.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Anthony Perard <anthony.perard@citrix.com>
Cc: xen-devel@lists.xensource.com
Cc: Michael Walle <michael@walle.cc>
Cc: Laurent Vivier <laurent@vivier.eu>
Cc: Anthony Green <green@moxielogic.com>
Cc: Jia Liu <proljc@gmail.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: qemu-ppc@nongnu.org
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: Fabien Chouteau <chouteau@adacore.com>
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Cc: Artyom Tarasenko <atar4qemu@gmail.com>
Cc: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Acked-By: Artyom Tarasenko <atar4qemu@gmail.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1487153147-11530-4-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
e0319b0302 hw/arm/cubieboard hw/arm/xlnx-ep108: Fix units_per_default_bus
Machine types cubieboard, xlnx-ep108, xlnx-zcu102 have an onboard AHCI
controller, but neglect to set their MachineClass member
units_per_default_bus = 1.  This permits -drive if=ide,unit=1, which
makes no sense for AHCI.  It also screws up index=N for odd N, because
it gets desugared to unit=1,bus=N/2

Doesn't really matter, because these machine types fail to honor
-drive if=ide.  Add the missing units_per_default_bus = 1 anyway,
along with a TODO comment on what needs to be done for -drive if=ide.

Also set block_default_type = IF_IDE explicitly.  It's currently the
default, but the next commit will change it to something more
sensible, and we want to keep the IF_IDE default for these three
machines.  See also the previous commit.

Cc: Beniamino Galvani <b.galvani@gmail.com>
Cc: Alistair Francis <alistair.francis@xilinx.com>
Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-arm@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Alistair Francis <alistair.francis@xilinx.com>
Message-Id: <1487153147-11530-3-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
2059839baa hw: Default -drive to if=ide explicitly where it works
Block backends defined with -drive if=ide are meant to be picked up by
machine initialization code: a suitable frontend gets created and
wired up automatically.

if=ide drives not picked up that way can still be used with -device as
if they had if=none, but that's unclean and best avoided.  Unused ones
produce an "Orphaned drive without device" warning.

-drive parameter "if" is optional, and the default depends on the
machine type.  If a machine type doesn't specify a default, the
default is "ide".

Many machine types default to if=ide, even though they don't actually
have an IDE controller.  A future patch will change these defaults to
something more sensible.  To prepare for it, this patch makes default
"ide" explicit for the machines that actually pick up if=ide drives:

* alpha: clipper
* arm/aarch64: spitz borzoi terrier tosa
* i386/x86_64: generic-pc-machine (with concrete subtypes pc-q35-*
  pc-i440fx-* pc-* isapc xenfv)
* mips64el: fulong2e
* mips/mipsel/mips64el: malta mips
* ppc/ppc64: mac99 g3beige prep
* sh4/sh4eb: r2d
* sparc64: sun4u sun4v

Note that ppc64 machine powernv already sets an "ide" default
explicitly.  Its IDE controller isn't implemented, yet.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1487153147-11530-2-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Peter Maydell
a0775e28cd Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Pull request

v2:
 * Rebased to resolve scsi conflicts

# gpg: Signature made Tue 21 Feb 2017 11:56:24 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request: (24 commits)
  coroutine-lock: make CoRwlock thread-safe and fair
  coroutine-lock: add mutex argument to CoQueue APIs
  coroutine-lock: place CoMutex before CoQueue in header
  test-aio-multithread: add performance comparison with thread-based mutexes
  coroutine-lock: add limited spinning to CoMutex
  coroutine-lock: make CoMutex thread-safe
  block: document fields protected by AioContext lock
  async: remove unnecessary inc/dec pairs
  aio-posix: partially inline aio_dispatch into aio_poll
  block: explicitly acquire aiocontext in aio callbacks that need it
  block: explicitly acquire aiocontext in bottom halves that need it
  block: explicitly acquire aiocontext in callbacks that need it
  block: explicitly acquire aiocontext in timers that need it
  aio: push aio_context_acquire/release down to dispatching
  qed: introduce qed_aio_start_io and qed_aio_next_io_cb
  blkdebug: reschedule coroutine on the AioContext it is running on
  coroutine-lock: reschedule coroutine on the AioContext it was running on
  nbd: convert to use qio_channel_yield
  io: make qio_channel_yield aware of AioContexts
  io: add methods to set I/O handlers on AioContext
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 11:58:03 +00:00
Paolo Bonzini
a7b91d35ba coroutine-lock: make CoRwlock thread-safe and fair
This adds a CoMutex around the existing CoQueue.  Because the write-side
can just take CoMutex, the old "writer" field is not necessary anymore.
Instead of removing it altogether, count the number of pending writers
during a read-side critical section and forbid further readers from
entering.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-7-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
1ace7ceac5 coroutine-lock: add mutex argument to CoQueue APIs
All that CoQueue needs in order to become thread-safe is help
from an external mutex.  Add this to the API.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-6-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
f8c6e1cbc3 coroutine-lock: place CoMutex before CoQueue in header
This will avoid forward references in the next patch.  It is also
more logical because CoQueue is not anymore the basic primitive.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-5-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
c05df34a87 test-aio-multithread: add performance comparison with thread-based mutexes
Add two implementations of the same benchmark as the previous patch,
but using pthreads.  One uses a normal QemuMutex, the other is Linux
only and implements a fair mutex based on MCS locks and futexes.
This shows that the slower performance of the 5-thread case is due to
the fairness of CoMutex, rather than to coroutines.  If fairness does
not matter, as is the case with two threads, CoMutex can actually be
faster than pthreads.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-4-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
480cff6322 coroutine-lock: add limited spinning to CoMutex
Running a very small critical section on pthread_mutex_t and CoMutex
shows that pthread_mutex_t is much faster because it doesn't actually
go to sleep.  What happens is that the critical section is shorter
than the latency of entering the kernel and thus FUTEX_WAIT always
fails.  With CoMutex there is no such latency but you still want to
avoid wait and wakeup.  So introduce it artificially.

This only works with one waiters; because CoMutex is fair, it will
always have more waits and wakeups than a pthread_mutex_t.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-3-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
fed20a70e3 coroutine-lock: make CoMutex thread-safe
This uses the lock-free mutex described in the paper '"Blocking without
Locking", or LFTHREADS: A lock-free thread library' by Gidenstam and
Papatriantafilou.  The same technique is used in OSv, and in fact
the code is essentially a conversion to C of OSv's code.

[Added missing coroutine_fn in tests/test-aio-multithread.c.
--Stefan]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-2-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
91bcea4899 block: document fields protected by AioContext lock
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-19-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
bd451435c0 async: remove unnecessary inc/dec pairs
Pull the increment/decrement pair out of aio_bh_poll and into the
callers.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-18-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
a153bf52b3 aio-posix: partially inline aio_dispatch into aio_poll
This patch prepares for the removal of unnecessary lockcnt inc/dec pairs.
Extract the dispatching loop for file descriptor handlers into a new
function aio_dispatch_handlers, and then inline aio_dispatch into
aio_poll.

aio_dispatch can now become void.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-17-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:39 +00:00
Paolo Bonzini
b9e413dd37 block: explicitly acquire aiocontext in aio callbacks that need it
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-16-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:39 +00:00
Paolo Bonzini
1919631e6b block: explicitly acquire aiocontext in bottom halves that need it
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-15-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:39 +00:00
Paolo Bonzini
9d45665448 block: explicitly acquire aiocontext in callbacks that need it
This covers both file descriptor callbacks and polling callbacks,
since they execute related code.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-14-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:36 +00:00
Paolo Bonzini
2f47da5f7f block: explicitly acquire aiocontext in timers that need it
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-13-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
0836c72f70 aio: push aio_context_acquire/release down to dispatching
The AioContext data structures are now protected by list_lock and/or
they are walked with FOREACH_RCU primitives.  There is no need anymore
to acquire the AioContext for the entire duration of aio_dispatch.
Instead, just acquire it before and after invoking the callbacks.
The next step is then to push it further down.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-12-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
b20123a28b qed: introduce qed_aio_start_io and qed_aio_next_io_cb
qed_aio_start_io and qed_aio_next_io will not have to acquire/release
the AioContext, while qed_aio_next_io_cb will.  Split the functionality
and gain a little type-safety in the process.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-11-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
e5c67ab552 blkdebug: reschedule coroutine on the AioContext it is running on
Keep the coroutine on the same AioContext.  Without this change,
there would be a race between yielding the coroutine and reentering it.
While the race cannot happen now, because the code only runs from a single
AioContext, this will change with multiqueue support in the block layer.

While doing the change, replace custom bottom half with aio_co_schedule.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-10-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
a9d9235567 coroutine-lock: reschedule coroutine on the AioContext it was running on
As a small step towards the introduction of multiqueue, we want
coroutines to remain on the same AioContext that started them,
unless they are moved explicitly with e.g. aio_co_schedule.  This patch
avoids that coroutines switch AioContext when they use a CoMutex.
For now it does not make much of a difference, because the CoMutex
is not thread-safe and the AioContext itself is used to protect the
CoMutex from concurrent access.  However, this is going to change.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-9-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
ff82911cd3 nbd: convert to use qio_channel_yield
In the client, read the reply headers from a coroutine, switching the
read side between the "read header" coroutine and the I/O coroutine that
reads the body of the reply.

In the server, if the server can read more requests it will create a new
"read request" coroutine as soon as a request has been read.  Otherwise,
the new coroutine is created in nbd_request_put.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-8-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
c4c497d27f io: make qio_channel_yield aware of AioContexts
Support separate coroutines for reading and writing, and place the
read/write handlers on the AioContext that the QIOChannel is registered
with.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-7-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
bf88c1247f io: add methods to set I/O handlers on AioContext
This is in preparation for making qio_channel_yield work on
AioContexts other than the main one.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-6-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
934ebf48c0 test-thread-pool: use generic AioContext infrastructure
Once the thread pool starts using aio_co_wake, it will also need
qemu_get_current_aio_context().  Make test-thread-pool create
an AioContext with qemu_init_main_loop, so that stubs/iothread.c
and tests/iothread.c can provide the rest.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-5-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
35f106e684 block-backend: allow blk_prw from coroutine context
qcow2_create2 calls this.  Do not run a nested event loop, as that
breaks when aio_co_wake tries to queue the coroutine on the co_queue_wakeup
list of the currently running one.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-4-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
0c330a734b aio: introduce aio_co_schedule and aio_co_wake
aio_co_wake provides the infrastructure to start a coroutine on a "home"
AioContext.  It will be used by CoMutex and CoQueue, so that coroutines
don't jump from one context to another when they go to sleep on a
mutex or waitqueue.  However, it can also be used as a more efficient
alternative to one-shot bottom halves, and saves the effort of tracking
which AioContext a coroutine is running on.

aio_co_schedule is the part of aio_co_wake that starts a coroutine
on a remove AioContext, but it is also useful to implement e.g.
bdrv_set_aio_context callbacks.

The implementation of aio_co_schedule is based on a lock-free
multiple-producer, single-consumer queue.  The multiple producers use
cmpxchg to add to a LIFO stack.  The consumer (a per-AioContext bottom
half) grabs all items added so far, inverts the list to make it FIFO,
and goes through it one item at a time until it's empty.  The data
structure was inspired by OSv, which uses it in the very code we'll
"port" to QEMU for the thread-safe CoMutex.

Most of the new code is really tests.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-3-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
c2b38b277a block: move AioContext, QEMUTimer, main-loop to libqemuutil
AioContext is fairly self contained, the only dependency is QEMUTimer but
that in turn doesn't need anything else.  So move them out of block-obj-y
to avoid introducing a dependency from io/ to block-obj-y.

main-loop and its dependency iohandler also need to be moved, because
later in this series io/ will call iohandler_get_aio_context.

[Changed copyright "the QEMU team" to "other QEMU contributors" as
suggested by Daniel Berrange and agreed by Paolo.
--Stefan]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-2-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Peter Maydell
b856256179 Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20170221-1' into staging
xhci: add qemu-xhci device, some followup cleanups.
ccid: better sanity checking.
ehci: fix memory leak
ohci: bugfixes.

# gpg: Signature made Tue 21 Feb 2017 07:14:35 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20170221-1:
  usb-ccid: add check message size checks
  usb-ccid: move header size check
  usb-ccid: better bulk_out error handling
  xhci: drop via vendor command handling
  xhci: fix nec vendor quirk handling
  xhci: add qemu xhci controller
  xhci: drop ER_FULL_HACK workaround
  xhci: apply limits to loops
  usb: ohci: limit the number of link eds
  usb: ohci: fix error return code in servicing iso td
  usb: ehci: fix memory leak in ehci

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 09:35:15 +00:00
Gerd Hoffmann
31fb4444a4 usb-ccid: add check message size checks
Check message size too when figuring whenever we should expect more data.
Fix debug message to show useful data, p->iov.size is fixed anyway if we
land there, print how much we got meanwhile instead.

Also check announced message size against actual message size.  That
is a more general fix for CVE-2017-5898 than commit "c7dfbf3 usb: ccid:
check ccid apdu length".

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487250819-23764-4-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
7569c54642 usb-ccid: move header size check
Move up header size check, so we can use header fields in sanity checks
(in followup patches).  Also reword the debug message.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487250819-23764-3-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
0aeebc73b7 usb-ccid: better bulk_out error handling
Add err goto label where we can jump to from all error conditions.
STALL request on all errors.  Reset position on all errors.

Normal request processing is not in a else branch any more, so this code
is reintended, there are no code changes in that part of the code
though.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487250819-23764-2-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
558ff1b6ef xhci: drop via vendor command handling
Seems pretty pointless, we don't emulate an via xhci controller.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486382139-30630-5-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
2992d6b49c xhci: fix nec vendor quirk handling
Only the TYPE_NEC_XHCI controller will have the nec vendor quirks.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486382139-30630-4-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
72a810f411 xhci: add qemu xhci controller
Turn existing TYPE_XHCI into an abstract base class.
Create two child classes, TYPE_NEC_XHCI (same name as old xhci
controller) and TYPE_QEMU_XHCI (using an ID from our namespace).

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Message-id: 1486382139-30630-3-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
898248a329 xhci: drop ER_FULL_HACK workaround
The nec/renesas driver problems have finally been debugged and root
caused, see commit "7da76e1 xhci: fix event queue IRQ handling".

It's pretty clear now that
 (a) The whole "driver can't handle ring full" story is most likely
     wrong.
 (b) The ER_FULL_HACK workaround based on the false assumtion doesn't
     much.  It avoids the driver crashing (without commit 7da76e1), but
     it doesn't make usb work.
 (c) With 7da76e1 applied it doesn't trigger any more.

So, lets kill it.  Or, to be exact, lets almost kill it.  Some data
fields are kept unused in the state struct, for live migration backward
compatibility.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486382139-30630-2-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
f89b60f6e5 xhci: apply limits to loops
Limits should be big enough that normal guest should not hit it.
Add a tracepoint to log them, just in case.  Also, while being
at it, log the existing link trb limit too.

Reported-by: 李强 <liqiang6-s@360.cn>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486383669-6421-1-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Li Qiang
95ed56939e usb: ohci: limit the number of link eds
The guest may builds an infinite loop with link eds. This patch
limit the number of linked ed to avoid this.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 5899a02e.45ca240a.6c373.93c1@mx.google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-21 08:11:42 +01:00
Li Qiang
26f670a244 usb: ohci: fix error return code in servicing iso td
It should return 1 if an error occurs when reading iso td.
This will avoid an infinite loop issue in ohci_service_ed_list.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 5899ac3e.1033240a.944d5.9a2d@mx.google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-21 08:11:42 +01:00
Li Qiang
d710e1e7bd usb: ehci: fix memory leak in ehci
In usb_ehci_init function, it initializes 's->ipacket', but there
is no corresponding function to free this. As the ehci can be hotplug
and unplug, this will leak host memory leak. In order to make the
hierarchy clean, we should add a ehci pci finalize function, then call
the clean function in ehci device.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 589a85b8.3c2b9d0a.b8e6.1434@mx.google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-21 08:11:42 +01:00
Peter Maydell
56f9e46b84 Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-02-20' into staging
QAPI patches for 2017-02-20

# gpg: Signature made Mon 20 Feb 2017 13:31:12 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2017-02-20:
  Makefile: Put VERSION info into version.texi rather than using -D
  qapi2texi: replace quotation by bold section name

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 17:42:47 +00:00
Peter Maydell
c8f21dbfc3 Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20170220-1' into staging
ui: opengl fixes, for spice and egl-helpers.

# gpg: Signature made Mon 20 Feb 2017 13:12:46 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-ui-20170220-1:
  egl-helpers: Support newer MESA versions
  spice: allow to specify drm rendernode

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 16:31:38 +00:00
Peter Maydell
6753e4ed15 Merge remote-tracking branch 'remotes/kraxel/tags/pull-input-20170220-1' into staging
input: add wctablet, ps2 fix

# gpg: Signature made Mon 20 Feb 2017 11:42:12 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-input-20170220-1:
  Add wctablet device
  ps2: fix mouse mappings for right/middle button

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 13:38:34 +00:00
Peter Maydell
fea346f569 Makefile: Put VERSION info into version.texi rather than using -D
Unfortunately some older versions of makeinfo don't correctly
handle the -D command line option and fail to set the variable.
This then causes them to complain
 docs/qemu-ga-ref.texi:41: warning: undefined flag: VERSION

Work around this by doing as the autotools do, and writing
the information into a version.texi file which we then
include from the .texi files that need it.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487357968-31000-1-git-send-email-peter.maydell@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-02-20 14:11:07 +01:00
Marc-André Lureau
1ede77dfd2 qapi2texi: replace quotation by bold section name
When we build qemu-qmp-ref.txt this causes texinfo to complain several
times:
"Negative repeat count does nothing at
/usr/share/texinfo/Texinfo/Convert/Line.pm line 124."

It also doesn't display correctly, because the "Notes" text disappears
entirely in the HTML version because it thinks there's no actual
quotation text.

The text file output formatting is also not good.

To solve those problems, remove usage of @quotation, and simply use bold
face for the section name.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170217093416.27688-1-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-02-20 14:10:46 +01:00
Peter Maydell
5d42ff913b Merge remote-tracking branch 'remotes/huth/tags/coldfire-20170219' into staging
Updates for the m68k ColdFire machines:
- Remove the obsolete dummy machine
- QOMify the ColdFire interrupt controller
- Volunteer for maintaining the orphan ColdFire boards

# gpg: Signature made Sat 18 Feb 2017 23:08:55 GMT
# gpg:                using RSA key 0x2ED9D774FE702DB5
# gpg: Good signature from "Thomas Huth <th.huth@gmx.de>"
# gpg:                 aka "Thomas Huth <thuth@redhat.com>"
# gpg:                 aka "Thomas Huth <huth@tuxfamily.org>"
# Primary key fingerprint: 27B8 8847 EEE0 2501 18F3  EAB9 2ED9 D774 FE70 2DB5

* remotes/huth/tags/coldfire-20170219:
  MAINTAINERS: Add odd fixer for the ColdFire boards
  hw/m68k: QOMify the ColdFire interrupt controller
  hw/m68k: Remove dummy machine

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 11:55:37 +00:00
Frediano Ziglio
0ea1523fb6 egl-helpers: Support newer MESA versions
According to
https://www.khronos.org/registry/EGL/extensions/MESA/EGL_MESA_platform_gbm.txt
if MESA_platform_gbm is supported display should be initialized
from a GBM handle using eglGetPlatformDisplayEXT.

Signed-off-by: Frediano Ziglio <fziglio@redhat.com>
Message-id: 20170220095055.4234-1-fziglio@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-20 12:46:09 +01:00
Marc-André Lureau
7b5255083b spice: allow to specify drm rendernode
When multiple GPU are available, picking the first one isn't always the
best choice. Learn to specify a device rendernode.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170212112118.16044-1-marcandre.lureau@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-20 12:44:32 +01:00
Anatoli Huseu1
378af96155 Add wctablet device
Add QEMU Wacom Penpartner serial tablet emulation.
GSoC 2016 project.

Signed-off-by: Anatoli Huseu1 <avg.tolik@gmail.com>

Various cleanups.
Add line speed tracking.
Implement ST and SP commands.
Adapted to chardev QOMification.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486391007-10116-1-git-send-email-kraxel@redhat.com
2017-02-20 11:26:28 +01:00
Fabian Lesniak
ed6f72b827 ps2: fix mouse mappings for right/middle button
Commit 8b0caab0 ("ps2: add support for mice with extra/side buttons")
accidentally swapped right and middle mouse buttons. This commit corrects
the mapping as expected by the ps2 controller.

Signed-off-by: Fabian Lesniak <fabian@lesniak-it.de>
Message-id: 20170204150319.8907-1-fabian@lesniak-it.de
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-20 11:25:38 +01:00
Peter Maydell
d514cfd763 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, pci: fixes, features

virtio is using region caches for performance
iommu support for IOTLBs
misc fixes

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 17 Feb 2017 19:53:02 GMT
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (23 commits)
  intel_iommu: vtd_slpt_level_shift check level
  intel_iommu: convert dbg macros to trace for trans
  intel_iommu: convert dbg macros to traces for inv
  intel_iommu: renaming gpa to iova where proper
  intel_iommu: simplify irq region translation
  intel_iommu: add "caching-mode" option
  vfio: allow to notify unmap for very large region
  vfio: introduce vfio_get_vaddr()
  vfio: trace map/unmap for notify as well
  pcie: simplify pcie_add_capability()
  virtio: Fix no interrupt when not creating msi controller
  virtio: use VRingMemoryRegionCaches for avail and used rings
  virtio: check for vring setup in virtio_queue_update_used_idx
  virtio: use VRingMemoryRegionCaches for descriptor ring
  virtio: add MemoryListener to cache ring translations
  virtio: use MemoryRegionCache to access descriptors
  exec: make address_space_cache_destroy idempotent
  virtio: use address_space_map/unmap to access descriptors
  virtio: add virtio_*_phys_cached
  memory: make memory_listener_unregister idempotent
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 09:53:59 +00:00
Thomas Huth
5baf2741b4 MAINTAINERS: Add odd fixer for the ColdFire boards
I did some work with real ColdFire boards in the past, and after
QOMifying most of the ColdFire devices recently, I feel confident
that I could at least take care of odd fixes for these boards.

Signed-off-by: Thomas Huth <huth@tuxfamily.org>
2017-02-18 22:23:31 +01:00
Thomas Huth
88b86983f3 hw/m68k: QOMify the ColdFire interrupt controller
Use type_init() and friends to adapt the ColdFire interrupt
controller to the latest QEMU device conventions.

Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Thomas Huth <huth@tuxfamily.org>
2017-02-18 22:23:31 +01:00
Thomas Huth
22f2dbe7ea hw/m68k: Remove dummy machine
Since it is now possible to instantiate a CPU and RAM with the "none"
machine, too, and a kernel can be loaded there with the generic loader
device, there is no more need for the m68k "dummy" machine. Thus let's
remove this unmaintained file now.

Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Thomas Huth <huth@tuxfamily.org>
2017-02-18 22:23:25 +01:00
Peter Xu
7e58326ad7 intel_iommu: vtd_slpt_level_shift check level
This helps in debugging incorrect level passed in.

Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
6c441e1d61 intel_iommu: convert dbg macros to trace for trans
Another patch to convert the DPRINTF() stuffs. This patch focuses on the
address translation path and caching.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
bc535e59c4 intel_iommu: convert dbg macros to traces for inv
VT-d codes are still using static DEBUG_INTEL_IOMMU macro. That's not
good, and we should end the day when we need to recompile the code
before getting useful debugging information for vt-d. Time to switch to
the trace system. This is the first patch to do it.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
6e9055641b intel_iommu: renaming gpa to iova where proper
There are lots of places in current intel_iommu.c codes that named
"iova" as "gpa". It is really confusing to use a name "gpa" in these
places (which is very easily to be understood as "Guest Physical
Address", while it's not). To make the codes (much) easier to be read, I
decided to do this once and for all.

No functional change is made. Only literal ones.

Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
046ab7e9be intel_iommu: simplify irq region translation
Now we have a standalone memory region for MSI, all the irq region
requests should be redirected there. Cleaning up the block with an
assertion instead.

Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Aviv Ben-David
3b40f0e53c intel_iommu: add "caching-mode" option
This capability asks the guest to invalidate cache before each map operation.
We can use this invalidation to trap map operations in the hypervisor.

Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
[peterx: using "caching-mode" instead of "cache-mode" to align with spec]
[peterx: re-write the subject to make it short and clear]
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
dfbd90e5b9 vfio: allow to notify unmap for very large region
Linux vfio driver supports to do VFIO_IOMMU_UNMAP_DMA for a very big
region. This can be leveraged by QEMU IOMMU implementation to cleanup
existing page mappings for an entire iova address space (by notifying
with an IOTLB with extremely huge addr_mask). However current
vfio_iommu_map_notify() does not allow that. It make sure that all the
translated address in IOTLB is falling into RAM range.

The check makes sense, but it should only be a sensible checker for
mapping operations, and mean little for unmap operations.

This patch moves this check into map logic only, so that we'll get
faster unmap handling (no need to translate again), and also we can then
better support unmapping a very big region when it covers non-ram ranges
or even not-existing ranges.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
4a4b88fbe1 vfio: introduce vfio_get_vaddr()
A cleanup for vfio_iommu_map_notify(). Now we will fetch vaddr even if
the operation is unmap, but it won't hurt much.

One thing to mention is that we need the RCU read lock to protect the
whole translation and map/unmap procedure.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
3213835720 vfio: trace map/unmap for notify as well
We traces its range, but we don't know whether it's a MAP/UNMAP. Let's
dump it as well.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
d4e9b75aa0 pcie: simplify pcie_add_capability()
When we add PCIe extended capabilities, we should be following the rule
that we add the head extended cap (at offset 0x100) first, then the rest
of them. Meanwhile, we are always adding new capability bits at the end
of the list. Here the "next" looks meaningless in all cases since it
should always be zero (along with the "header").

Simplify the function a bit, and it looks more readable now.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Michael S. Tsirkin
b4b9862b53 virtio: Fix no interrupt when not creating msi controller
For ARM virt machine, if we use virt-2.7 which will not create ITS node,
the virtio-net can not recieve interrupts so it can't get ip address
through dhcp.
This fixes commit 83d768b(virtio: set ISR on dataplane notifications).

Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
97cd965c07 virtio: use VRingMemoryRegionCaches for avail and used rings
The virtio-net change is necessary because it uses virtqueue_fill
and virtqueue_flush instead of the more convenient virtqueue_push.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
ca0176ad83 virtio: check for vring setup in virtio_queue_update_used_idx
If the vring has not been set up, it is not necessary for vring_used_idx
to do anything (as is already the case when the caller is virtio_load).
This is harmless for now, but it will be a problem when the
MemoryRegionCache has not been set up.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
991976f751 virtio: use VRingMemoryRegionCaches for descriptor ring
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
c611c76417 virtio: add MemoryListener to cache ring translations
The cached translations are RCU-protected to allow efficient use
when processing virtqueues.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
5eba0404b9 virtio: use MemoryRegionCache to access descriptors
For now, the cache is created on every virtqueue_pop.  Later on,
direct descriptors will be able to reuse it.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
91047df38d exec: make address_space_cache_destroy idempotent
Clear cache->mr so that address_space_cache_destroy does nothing
the second time it is called.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
9796d0ac8f virtio: use address_space_map/unmap to access descriptors
This makes little difference, but it makes the code change smaller
for the next patch that introduces MemoryRegionCache.  This is
because map/unmap are similar to MemoryRegionCache init/destroy.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
e6a830d6eb virtio: add virtio_*_phys_cached
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
1d8280c18f memory: make memory_listener_unregister idempotent
Make it easy to unregister a MemoryListener without tracking whether it
had been registered before.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Haozhong Zhang
79c0f397fe docs: add document to explain the usage of vNVDIMM
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Fam Zheng
0793169870 virtio: Report real progress in VQ aio poll handler
In virtio_queue_host_notifier_aio_poll, not all "!virtio_queue_empty()"
cases are making true progress.

Currently the offending one is virtio-scsi event queue, whose handler
does nothing if no event is pending. As a result aio_poll() will spin on
the "non-empty" VQ and take 100% host CPU.

Fix this by reporting actual progress from virtio queue aio handlers.

Reported-by: Ed Swierk <eswierk@skyportsystems.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Tested-by: Ed Swierk <eswierk@skyportsystems.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Michael S. Tsirkin
4bb571d857 pci/pcie: don't assume cap id 0 is reserved
VFIO actually wants to create a capability with ID == 0.
This is done to make guest drivers skip the given capability.
pcie_add_capability then trips up on this capability
when looking for end of capability list.

To support this use-case, it's easy enough to switch to
e.g. 0xffffffff for these comparisons - we can be sure
it will never match a 16-bit capability ID.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-17 21:52:30 +02:00
Peter Maydell
ad584d37f2 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* GUEST_PANICKED improvements (Anton)
* vCont gdbstub rewrite (Claudio)
* Fix CPU creation with -device (Liyang)
* Logging fixes for pty chardevs (Ed)
* Makefile "move if changed" fix (Lin)
* First part of cpu_exec refactoring (me)
* SVM emulation fix (me)
* apic_delivered fix (Pavel)
* "info ioapic" fix (Peter)
* qemu-nbd socket activation (Richard)
* QOMification of mcf_uart (Thomas)

# gpg: Signature made Thu 16 Feb 2017 17:37:31 GMT
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (23 commits)
  target-i386: correctly propagate retaddr into SVM helpers
  vl: log available guest crash information
  report guest crash information in GUEST_PANICKED event
  i386/cpu: add crash-information QOM property
  Makefile: avoid leaving the temporary QEMU_PKGVERSION header file
  vl: Move the cpu_synchronize_all_post_init() after generic devices initialization
  qemu-nbd: Implement socket activation.
  qemu-doc: Clarify that -vga std is now the default
  cpu-exec: remove outermost infinite loop
  cpu-exec: avoid repeated sigsetjmp on interrupts
  cpu-exec: avoid cpu_loop_exit in cpu_handle_interrupt
  cpu-exec: tighten barrier on TCG_EXIT_REQUESTED
  cpu-exec: fix icount out-of-bounds access
  hw/char/mcf_uart: QOMify the ColdFire UART
  gdbstub: Fix vCont behaviour
  move vm_start to cpus.c
  char: drop data written to a disconnected pty
  apic: reset apic_delivered global variable on machine reset
  qemu-char: socket backend: disconnect on write error
  test-vmstate: remove yield_until_fd_readable
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-16 17:46:52 +00:00
Paolo Bonzini
65c9d60a3a target-i386: correctly propagate retaddr into SVM helpers
Commit 2afbdf8 ("target-i386: exception handling for memory helpers",
2015-09-15) changed tlb_fill's cpu_restore_state+raise_exception_err
to raise_exception_err_ra.  After this change, the cpu_restore_state
and raise_exception_err's cpu_loop_exit are merged into
raise_exception_err_ra's cpu_loop_exit_restore.

This actually fixed some bugs, but when SVM is enabled there is a
second path from raise_exception_err_ra to cpu_loop_exit.  This is
the VMEXIT path, and now cpu_vmexit is called without a
cpu_restore_state before.

The fix is to pass the retaddr to cpu_vmexit (via
cpu_svm_check_intercept_param).  All helpers can now use GETPC() to pass
the correct retaddr, too.

Cc: qemu-stable@nongnu.org
Fixes: 2afbdf8480
Reported-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
Tested-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 18:37:01 +01:00
Peter Maydell
7a37b59f1d Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-upstream-pull-request' into staging
# gpg: Signature made Thu 16 Feb 2017 14:35:46 GMT
# gpg:                using RSA key 0xF30C38BD3F2FBE3C
# gpg: Good signature from "Laurent Vivier <lvivier@redhat.com>"
# gpg:                 aka "Laurent Vivier <laurent@vivier.eu>"
# gpg:                 aka "Laurent Vivier (Red Hat) <lvivier@redhat.com>"
# Primary key fingerprint: CD2F 75DD C8E3 A4DC 2E4F  5173 F30C 38BD 3F2F BE3C

* remotes/vivier2/tags/linux-user-for-upstream-pull-request:
  linux-user: Add FICLONE and FICLONERANGE ioctls
  linux-user: Use correct types in load_symbols()
  linux-user: fill target sigcontext struct accordingly
  linux-user: fix tcg/mmap test
  linux-user: fix settime old value location
  linux-user: Update m68k syscall definitions to match Linux 4.6
  linux-user: Update sh4 syscall definitions to match Linux 4.8
  linux-user: manage two new IFLA host message types
  linux-user: Fix mq_open
  linux-user: Fix readahead
  linux-user: Fix inotify_init1 support
  linux-user: Fix s390x safe-syscall for z900
  linux-user: drop __cygwin__ ifdef
  linux-user: remove ifdef __USER_MISC

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-16 15:03:28 +00:00
Anton Nefedov
f47291b7a7 vl: log available guest crash information
There is a suitable log mask for the purpose.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-Id: <1487053524-18674-4-git-send-email-den@openvz.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:49 +01:00
Anton Nefedov
c86f106b85 report guest crash information in GUEST_PANICKED event
it's not very convenient to use the crash-information property interface,
so provide a CPU class callback to get the guest crash information, and pass
that information in the event

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-Id: <1487053524-18674-3-git-send-email-den@openvz.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:49 +01:00
Anton Nefedov
d187e08dc4 i386/cpu: add crash-information QOM property
Windows reports BSOD parameters through Hyper-V crash MSRs. This
information is very useful for initial crash analysis and thus
it would be nice to have a way to fetch it.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-Id: <1487053524-18674-2-git-send-email-den@openvz.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:49 +01:00
d9e73d32a8 Makefile: avoid leaving the temporary QEMU_PKGVERSION header file
By commit 67a1de0d, When we perform 'git pull && make && sudo make install',
In 'make' stage a qemu-version.h.tmp will be generated. If the content of
qemu-version.h.tmp and qemu-version.h aren't consistent, The qemu-version.h.tmp
will be renamed to qemu-version.h. Because of the target FORCE, The same action
will be do again in 'make install' stage.

In 'make install' stage, If there is no qemu-version.h.tmp exists and we run
'make install' with sudo, The owner and group of new qemu-version.h.tmp will be
privileged user/group. When we run 'make' next time, qemu-version.h.tmp can't
be overwritten because of permission issue.

This patch removed qemu-version.h.tmp after build to fix this issue.

Signed-off-by: Lin Ma <lma@suse.com>
Message-Id: <20170215024030.23895-1-lma@suse.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:49 +01:00
Dou Liyang
3741c2503b vl: Move the cpu_synchronize_all_post_init() after generic devices initialization
At the Qemu initialization, we call the cpu_synchronize_all_post_init()
to synchronize All CPU states to KVM in the ./vl.c::main().

Currently, it is called before we initialize the CPUs, which is created
by "-device" command and parsed by generic devices initialization, So,
these CPUs may be ignored to synchronize.

The patch moves the cpu_synchronize_all_post_init func after generic
devices initialization to make sure that all the CPUs can be included.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Message-Id: <1485916178-17838-1-git-send-email-douly.fnst@cn.fujitsu.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Acked-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:49 +01:00
Richard W.M. Jones
a721f53b8f qemu-nbd: Implement socket activation.
Socket activation (sometimes known as systemd socket activation)
allows an Internet superserver to pass a pre-opened listening socket
to the process, instead of having qemu-nbd open a socket itself.  This
is done via the LISTEN_FDS and LISTEN_PID environment variables, and a
standard file descriptor range.

This change partially implements socket activation for qemu-nbd.  If
the environment variables are set correctly, then socket activation
will happen automatically, otherwise everything works as before.  The
limitation is that LISTEN_FDS must be 1.

Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
Message-Id: <20170204100317.32425-2-rjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 15:30:45 +01:00
Helge Deller
21992cb679 linux-user: Add FICLONE and FICLONERANGE ioctls
Add missing FICLONE and FICLONERANGE ioctls.

Signed-off-by: Helge Deller <deller@gmx.de>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170211222602.GA6399@ls3530.fritz.box>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:30 +01:00
Peter Maydell
1e06262da6 linux-user: Use correct types in load_symbols()
Coverity doesn't like the code in load_symbols() which assumes
it can use 'int' for a variable that might hold an offset into
the guest ELF file, because in a 64-bit guest that could
overflow. Guest binaries with 2GB sections aren't very likely
and this isn't a security issue because we fully trust the
guest linux-user binary anyway, but we might as well use the
right types, which will placate Coverity. Use uint64_t to
hold section sizes, and bail out if the symbol table is too
large rather than just overflowing an int.

(Coverity issue CID1005776)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1486249533-5260-1-git-send-email-peter.maydell@linaro.org>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:30 +01:00
Jose Ricardo Ziviani
26920a2961 linux-user: fill target sigcontext struct accordingly
A segfault is noticed when an emulated program uses any of ucontext
regs fields. Risu detected this issue in the following operation when
handling a signal:
  ucontext_t *uc = (ucontext_t*)uc;
  uc->uc_mcontext.regs->nip += 4;

but this works fine:
  uc->uc_mcontext.gp_regs[PT_NIP] += 4;

This patch set regs to a valid location as well as other sigcontext
fields.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <1485900317-3256-1-git-send-email-joserz@linux.vnet.ibm.com>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:30 +01:00
Marc-André Lureau
35f2fd04ce linux-user: fix tcg/mmap test
tests/tcg/mmap test fails with values other than default target page
size. When creating a map beyond EOF, extra anonymous pages are added up
to the target page boundary. Currently, this operation is performed only
when qemu_real_host_page_size < TARGET_PAGE_SIZE, but it should be
performed if the configured page size (qemu -p) is larger than
qemu_real_host_page_size too.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
[pranith: dropped checkpatch changes]
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170119151533.29328-2-bobby.prani@gmail.com>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:30 +01:00
Marc-André Lureau
40c80b5e9e linux-user: fix settime old value location
old_value is the 4th argument of timer_settime(), not the 2nd.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170119151533.29328-1-bobby.prani@gmail.com>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:30 +01:00
John Paul Adrian Glaubitz
23d208ce6d linux-user: Update m68k syscall definitions to match Linux 4.6
Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170116224915.19430-2-glaubitz@physik.fu-berlin.de>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:26 +01:00
John Paul Adrian Glaubitz
3148ff8404 linux-user: Update sh4 syscall definitions to match Linux 4.8
Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170116223140.18634-2-glaubitz@physik.fu-berlin.de>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-16 15:29:16 +01:00
Alberto Garcia
41eeb0e601 qemu-doc: Clarify that -vga std is now the default
The QEMU manual page states that Cirrus Logic is the default video
card if the user doesn't specify any. However this is not true since
QEMU 2.2.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-Id: <20170127094154.19778-1-berto@igalia.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Paolo Bonzini
4515e58d60 cpu-exec: remove outermost infinite loop
Reorganize the sigsetjmp so that the restart case falls through
to cpu_handle_exception and the execution loop.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Paolo Bonzini
a42cf3f3f2 cpu-exec: avoid repeated sigsetjmp on interrupts
The sigsetjmp only needs to be prepared once for the whole execution
of cpu_exec.  This patch takes care of the "== 0" side, using a
nested loop so that cpu_handle_interrupt goes straight back to
cpu_handle_exception without doing another sigsetjmp.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Paolo Bonzini
209b71b60e cpu-exec: avoid cpu_loop_exit in cpu_handle_interrupt
The siglongjmp goes straight back to the beginning of cpu_exec's
outermost loop.  We do not need a siglongjmp, we can simply
leave the inner TB execution loop.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Paolo Bonzini
a70fe14b7d cpu-exec: tighten barrier on TCG_EXIT_REQUESTED
This seems to have worked just fine so far on weakly-ordered
architectures, but I don't see anything that prevents the
reordering from:

    store 1 to exit_request
    store 1 to tcg_exit_req
                                 load tcg_exit_req
                                 store 0 to tcg_exit_req
                                 load exit_request
                                 store 0 to exit_request
    store 1 to exit_request
    store 1 to tcg_exit_req

to this:

    store 1 to exit_request
    store 1 to tcg_exit_req
                                 load tcg_exit_req
                                 load exit_request
    store 1 to exit_request
    store 1 to tcg_exit_req
                                 store 0 to tcg_exit_req
                                 store 0 to exit_request

therefore losing a request.  It's possible that other memory barriers
(e.g. in rcu_read_unlock) are hiding it, but better safe than
sorry.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Paolo Bonzini
43d70ddf9f cpu-exec: fix icount out-of-bounds access
When icount is active, tb_add_jump is surprisingly called with an
out of bounds basic block index.  I have no idea how that can work,
but it does not seem like a good idea.  Clear *last_tb for all
TB_EXIT_ICOUNT_EXPIRED cases, even when all you have to do is
refill icount_extra.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Thomas Huth
d9ff1d35c5 hw/char/mcf_uart: QOMify the ColdFire UART
Use type_init() etc. to adapt the ColdFire UART
to the latest QEMU device conventions.

Signed-off-by: Thomas Huth <huth@tuxfamily.org>
Message-Id: <1485586582-6490-1-git-send-email-huth@tuxfamily.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Claudio Imbrenda
544177ad1c gdbstub: Fix vCont behaviour
When GDB issues a "vCont", QEMU was not handling it correctly when
multiple VCPUs are active.
For vCont, for each thread (VCPU), it can be specified whether to
single step, continue or stop that thread. The default is to stop a
thread.
However, when (for example) "vCont;s:2" is issued, all VCPUs continue
to run, although all but VCPU nr 2 are to be stopped.

This patch completely rewrites the vCont parsing code.

Please note that this improvement only works in system emulation mode,
when in userspace emulation mode the old behaviour is preserved.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Message-Id: <1487092068-16562-3-git-send-email-imbrenda@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:56 +01:00
Claudio Imbrenda
2d76e82395 move vm_start to cpus.c
This patch:

* moves vm_start to cpus.c.
* exports qemu_vmstop_requested, since it's needed by vm_start.
* extracts vm_prepare_start from vm_start; it does what vm_start did,
  except restarting the cpus.
* vm_start now calls vm_prepare_start and then restarts the cpus.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Message-Id: <1487092068-16562-2-git-send-email-imbrenda@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Ed Swierk
1c64fdbc81 char: drop data written to a disconnected pty
When a serial port writes data to a pty that's disconnected, drop the
data and return the length dropped. This avoids triggering pointless
retries in callers like the 16550A serial_xmit(), and causes
qemu_chr_fe_write() to write all data to the log file, rather than
logging only while a pty client like virsh console happens to be
connected.

Signed-off-by: Ed Swierk <eswierk@skyportsystems.com>
Message-Id: <1485870329-79428-1-git-send-email-eswierk@skyportsystems.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Pavel Dovgalyuk
f65e821262 apic: reset apic_delivered global variable on machine reset
This patch adds call to apic_reset_irq_delivered when the virtual
machine is reset.

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-Id: <20170131114054.276.62201.stgit@PASHA-ISP>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Anton Nefedov
b0a335e351 qemu-char: socket backend: disconnect on write error
Socket backend read handler should normally perform a disconnect, however
the read handler may not get a chance to run if the frontend is not ready
(qemu_chr_be_can_write() == 0).

This means that in virtio-serial frontend case if
 - the host has disconnected (giving EPIPE on socket write)
 - and the guest has disconnected (-> frontend not ready -> backend
   will not read)
 - and there is still data (frontend->backend) to flush (has to be a really
   tricky timing but nevertheless, we have observed the case in production)

This results in virtio-serial trying to flush this data continiously forming
a busy loop.

Solution: react on write error in the socket write handler.
errno is not reliable after qio_channel_writev_full(), so we may not get
the exact EPIPE, so disconnect on any error but QIO_CHANNEL_ERR_BLOCK which
io_channel_send_full() converts to errno EAGAIN.
We must not disconnect right away though, there still may be data to read
(see 4bf1cb0).

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Daniel P. Berrange <berrange@redhat.com>
CC: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1486045589-8074-1-git-send-email-den@openvz.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Paolo Bonzini
a3fd46152e test-vmstate: remove yield_until_fd_readable
The function is not needed anymore now that migration is built on
top of QIOChannel.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Peter Xu
b7a4104b73 kvm/ioapic: correct kvm ioapic version
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1486106298-3699-4-git-send-email-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Peter Xu
8d5516be12 ioapic: fix error report value of def version
It should be 0x20, rather than 0x11.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1486106298-3699-3-git-send-email-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Peter Xu
c6fcb0e201 kvm/ioapic: dump real object instead of a fake one
When we do "info ioapic" for kvm ioapic, we were building up a temporary
ioapic object. Let's fetch the real one and update correspond to the
real object as well.

This fixes printing uninitialized version field in
ioapic_print_redtbl().

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1486106298-3699-2-git-send-email-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-16 14:06:55 +01:00
Peter Maydell
ca5266de6c Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Wed 15 Feb 2017 03:46:59 GMT
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  net: e1000e: fix an infinite loop issue
  net: imx: limit buffer descriptor count
  colo-compare: sort TCP packet queue by sequence number
  net: e1000e: fix dead code in e1000e_write_packet_to_guest
  net: Mark 'vlan' parameter as deprecated

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-16 12:36:24 +00:00
Li Qiang
4154c7e03f net: e1000e: fix an infinite loop issue
This issue is like the issue in e1000 network card addressed in
this commit:
e1000: eliminate infinite loops on out-of-bounds transfer start.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Reviewed-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-02-15 11:18:57 +08:00
Prasad J Pandit
81f17e0d43 net: imx: limit buffer descriptor count
i.MX Fast Ethernet Controller uses buffer descriptors to manage
data flow to/fro receive & transmit queues. While transmitting
packets, it could continue to read buffer descriptors if a buffer
descriptor has length of zero and has crafted values in bd.flags.
Set an upper limit to number of buffer descriptors.

Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-02-15 11:18:57 +08:00
Zhang Chen
a935cc3132 colo-compare: sort TCP packet queue by sequence number
Improve efficiency of TCP packet comparison.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-02-15 11:18:57 +08:00
Paolo Bonzini
e514fc7e12 net: e1000e: fix dead code in e1000e_write_packet_to_guest
Because is_first is declared inside a loop, it is always true.  The store
is dead, and so is the "else" branch of "if (is_first)".  is_last is
okay though.

Reported by Coverity.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-02-15 11:18:57 +08:00
Thomas Huth
a2dbe1356f net: Mark 'vlan' parameter as deprecated
The 'vlan' parameter is a continuous source of confusion for the users,
many people mix it up with the more common term VLAN (the link layer
packet encapsulation), and even if they realize that the QEMU 'vlan' is
rather some kind of network hub emulation, there is still a high risk
that they configure their QEMU networking in a wrong way with this
parameter (e.g. by hooking NICs together, so they get a 'loopback'
between one and the other NIC).
Thus at one point in time, we should finally get rid of the 'vlan'
feature in QEMU. Let's do a first step in this direction by declaring
the 'vlan' parameter as deprecated and informing the users to use the
'netdev' parameter instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-02-15 11:18:57 +08:00
Laurent Vivier
a1488b8661 linux-user: manage two new IFLA host message types
Add QEMU_IFLA_GSO_MAX_SEGS and QEMU_IFLA_GSO_MAX_SIZE
in host_to_target_data_link_rtattr().

These two messages are sent by the host kernel when
we use "sudo".

Found with qemu-m68k and Debian etch-m68k (sudo 1.6.8p12-4) and
host kernel 4.7.6-200.fc24.x86_64

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <1477530049-15676-1-git-send-email-laurent@vivier.eu>
2017-02-14 18:08:11 +01:00
Lena Djokic
2640077527 linux-user: Fix mq_open
If fourth argument is NULL it should be passed without
using lock_user function which would, in that case, return
EFAULT, and system call supports passing NULL as fourth argument.

Signed-off-by: Lena Djokic <Lena.Djokic@rt-rk.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Lena Djokic
77c6850fd7 linux-user: Fix readahead
Calculation of 64-bit offset was not correct for all cases.

Signed-off-by: Lena Djokic <Lena.Djokic@rt-rk.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Lena Djokic
fea243e90a linux-user: Fix inotify_init1 support
This commit adds necessary conversion of argument passed to inotify_init1.
inotify_init1 flags can be IN_NONBLOCK and IN_CLOEXEC which rely on O_NONBLOCK
and O_CLOEXEC and those can have different values on different platforms.

Signed-off-by: Lena Djokic <Lena.Djokic@rt-rk.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Richard Henderson
6cde51769e linux-user: Fix s390x safe-syscall for z900
The LT instruction was added in the extended immediate facility
introduced with the z9-109 processor.

Cc: Riku Voipio <riku.voipio@iki.fi>
Reported-by: Michael Tokarev <mjt@tls.msk.ru>
Fixes: c9bc3437a9
Suggested-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Riku Voipio
5fbf66e6a1 linux-user: drop __cygwin__ ifdef
linux-user doesn't work on cygwin anyways.

Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Riku Voipio
b9a0be9239 linux-user: remove ifdef __USER_MISC
This preprocessor macro isn't set anywhere. Remove
the check so -strace can show these options.

Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2017-02-14 17:18:03 +01:00
Peter Maydell
5dae13cd71 Merge remote-tracking branch 'remotes/rth/tags/pull-or-20170214' into staging
Queued openrisc patches

# gpg: Signature made Mon 13 Feb 2017 21:21:03 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-or-20170214: (24 commits)
  target/openrisc: Optimize for r0 being zero
  target/openrisc: Tidy handling of delayed branches
  target/openrisc: Tidy ppc/npc implementation
  target/openrisc: Optimize l.jal to next
  target/openrisc: Fix madd
  target/openrisc: Implement muld, muldu, macu, msbu
  target/openrisc: Represent MACHI:MACLO as a single unit
  target/openrisc: Implement msync
  target/openrisc: Enable trap, csync, msync, psync for user mode
  target/openrisc: Set flags on helpers
  target/openrisc: Use movcond where appropriate
  target/openrisc: Keep SR_CY and SR_OV in a separate variables
  target/openrisc: Keep SR_F in a separate variable
  target/openrisc: Invert the decoding in dec_calc
  target/openrisc: Put SR[OVE] in TB flags
  target/openrisc: Streamline arithmetic and OVE
  target/openrisc: Rationalize immediate extraction
  target/openrisc: Tidy insn dumping
  target/openrisc: Implement lwa, swa
  target/openrisc: Fix exception handling status registers
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-14 09:55:48 +00:00
Richard Henderson
6597c28d61 target/openrisc: Optimize for r0 being zero
The HW does not special-case r0, but the ABI specifies that r0 should
contain 0.  If we expose this fact to the optimizer, we can simplify
a lot of the generated code.  We must of course verify that r0==0, but
that is trivial to do with a TB flag.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
a01deb36a6 target/openrisc: Tidy handling of delayed branches
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
24c328521b target/openrisc: Tidy ppc/npc implementation
The NPC SPR is really only supposed to be used for FPGA debugging.
It contains the same contents as PC, unless one plays games.  Follow
the or1ksim implementation in flushing delayed branch state when it
is changed.

The PPC SPR need not be updated every instruction, merely when we
exit the TB or attempt to read its contents.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
a8000cb480 target/openrisc: Optimize l.jal to next
This allows the tcg optimizer to see, and fold, all of the
constants involved in a GOT base register load sequence.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
762e22edcd target/openrisc: Fix madd
Note that the specification for lf.madd.s is confused.  It's
the only mention of supposed FPMADDHI/FPMADDLO special registers.
On the other hand, or1ksim implements a somewhat normal non-fused
multiply and add.  Mirror that.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
cc5de49ebe target/openrisc: Implement muld, muldu, macu, msbu
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
6f7332ba71 target/openrisc: Represent MACHI:MACLO as a single unit
Significantly simplifies the implementation of the use of MAC.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
24fc5c0feb target/openrisc: Implement msync
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
20dc52a37c target/openrisc: Enable trap, csync, msync, psync for user mode
Not documented as disabled for user mode.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:15:00 +11:00
Richard Henderson
9fba702bd4 target/openrisc: Set flags on helpers
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
784696d119 target/openrisc: Use movcond where appropriate
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
9745807191 target/openrisc: Keep SR_CY and SR_OV in a separate variables
This significantly streamlines carry and overflow production.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
84775c43f3 target/openrisc: Keep SR_F in a separate variable
This avoids having to keep merging and extracting the flag from SR.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
cf2ae4428f target/openrisc: Invert the decoding in dec_calc
Decoding the opcodes in the right order reduces by 100+ lines.
Also, it happens to put the opcodes in the same order as Chapter 17.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
0c53d7342b target/openrisc: Put SR[OVE] in TB flags
Removes a call at execution time for overflow exceptions.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
9ecaa27e71 target/openrisc: Streamline arithmetic and OVE
Fix incorrect overflow calculation.  Move overflow exception check
to a helper function, to eliminate inline branches.  Remove some
incorrect special casing of R0.  Implement multiply inline.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
6da544a6c4 target/openrisc: Rationalize immediate extraction
The architecture manual is consistent in using "I" for signed
fields and "K" for unsigned fields.  Mirror that.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
111ece5133 target/openrisc: Tidy insn dumping
Avoids warnings from unused variables etc.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
930c3d0074 target/openrisc: Implement lwa, swa
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Stafford Horne
c56e3b8670 target/openrisc: Fix exception handling status registers
I am working on testing instruction emulation patches for the linux
kernel. During testing I found these 2 issues:

 - sets DSX (delay slot exception) but never clears it
 - EEAR for illegal insns should point to the bad exception (as per
   openrisc spec) but its not

This patch fixes these two issues by clearing the DSX flag when not in a
delay slot and by setting EEAR to exception PC when handling illegal
instruction exceptions.

After this patch the openrisc kernel with latest patches boots great on
qemu and instruction emulation works.

Cc: qemu-trivial@nongnu.org
Cc: openrisc@lists.librecores.org
Signed-off-by: Stafford Horne <shorne@gmail.com>
Message-Id: <20170113220028.29687-1-shorne@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
c40413a65e linux-user: Honor CLONE_SETTLS for openrisc
Threads work much better when you set the TLS register.
This was fixed in the upstream kernel for Linux 4.9.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:59 +11:00
Richard Henderson
a0adc417a0 linux-user: Fix openrisc cpu_loop
We need to handle EXCP_DEBUG and EXCP_INTERRUPT.
We need to send signals to the guest using queue_signal.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:58 +11:00
Richard Henderson
ab90233855 linux-user: Add MMAP_SHIFT for openrisc
The page size on openrisc is 8k.  Sync the shift
required for the mmap2 syscall.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:58 +11:00
Richard Henderson
4a09d0bb34 target/openrisc: Rename the cpu from or32 to or1k
This is in keeping with the toolchain and or1ksim.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-14 08:14:58 +11:00
Peter Maydell
ec7a9bd5bb Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20170213a' into staging
Migration

  Amit: migration: remove myself as maintainer
        MAINTAINERS: update my email address
  Ashijeet: migrate: Introduce zero RAM checks to skip RAM migration
  Pavel: Postcopy release RAM
  Halil: consolidate VMStateField.start
  Hailiang: COLO: fix setting checkpoint-delay not working properly
         COLO: Shutdown related socket fd while do failover
         COLO: Don't process failover request while loading VM's state
  Me:
     migration: Add VMSTATE_UNUSED_VARRAY_UINT32
     migration: Add VMSTATE_WITH_TMP
     tests/migration: Add test for VMSTATE_WITH_TMP
     virtio-net VMState conversion and new VMSTATE macros

# gpg: Signature made Mon 13 Feb 2017 17:36:39 GMT
# gpg:                using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20170213a:
  virtio/migration: Migrate virtio-net to VMState
  tests/migration: Add test for VMSTATE_WITH_TMP
  migration: Add VMSTATE_WITH_TMP
  migration: Add VMSTATE_UNUSED_VARRAY_UINT32
  COLO: Don't process failover request while loading VM's state
  COLO: Shutdown related socket fd while do failover
  COLO: fix setting checkpoint-delay not working properly
  migration: consolidate VMStateField.start
  migrate: Introduce zero RAM checks to skip RAM migration
  migration: discard non-dirty ram pages after the start of postcopy
  add 'release-ram' migrate capability
  migration: add MigrationState arg for ram_save_/compressed_/page()
  MAINTAINERS: update my email address
  migration: remove myself as maintainer

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-13 18:49:26 +00:00
Dr. David Alan Gilbert
982b78c5e3 virtio/migration: Migrate virtio-net to VMState
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20170203160651.19917-5-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
  Merge fix against Halil's removal of the '_start' field in
    VMSTATE_VBUFFER_MULTIPLY
2017-02-13 17:27:14 +00:00
Dr. David Alan Gilbert
5c379d9031 tests/migration: Add test for VMSTATE_WITH_TMP
Add a test for VMSTATE_WITH_TMP to tests/test-vmstate.c

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170203160651.19917-4-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:14 +00:00
Dr. David Alan Gilbert
bcf4513129 migration: Add VMSTATE_WITH_TMP
VMSTATE_WITH_TMP is for handling structures where some calculation
or rearrangement of the data needs to be performed before the data
hits the wire.
For example,  where the value on the wire is an offset from a
non-migrated base, but the data in the structure is the actual pointer.

To use it, a temporary type is created and a vmsd used on that type.
The first element of the type must be 'parent' a pointer back to the
type of the main structure.  VMSTATE_WITH_TMP takes care of allocating
and freeing the temporary before running the child vmsd.

The post_load/pre_save on the child vmsd can copy things from the parent
to the temporary using the parent pointer and do any other calculations
needed; it can then use normal VMSD entries to do the actual data
storage without having to fiddle around with qemu_get_*/qemu_put_*

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170203160651.19917-3-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:14 +00:00
Dr. David Alan Gilbert
b5b5c56957 migration: Add VMSTATE_UNUSED_VARRAY_UINT32
VMSTATE_UNUSED_VARRAY_UINT32 is used to skip a chunk of the stream
that's an n-element array;  note the array size and the dynamic value
read never get multiplied so there's no overflow risk.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170203160651.19917-2-dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:14 +00:00
zhanghailiang
a8664ba510 COLO: Don't process failover request while loading VM's state
We should not do failover work while the main thread is loading
VM's state. Otherwise the consistent of VM's memory and
device state will be broken.

We will restart the loading process after jump over the stage,
The new failover status 'RELAUNCH' will help to record if we
need to restart the process.

Cc: Eric Blake <eblake@redhat.com>
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1484657864-21708-4-git-send-email-zhang.zhanghailiang@huawei.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
   Added a missing '(Since 2.9)'
2017-02-13 17:27:13 +00:00
zhanghailiang
c937b9a6db COLO: Shutdown related socket fd while do failover
If the net connection between primary host and secondary host breaks
while COLO/COLO incoming threads are doing read() or write().
It will block until connection is timeout, and the failover process
will be blocked because of it.

So it is necessary to shutdown all the socket fds used by COLO
to avoid this situation. Besides, we should close the corresponding
file descriptors after failvoer BH shutdown them,
Or there will be an error.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1484657864-21708-3-git-send-email-zhang.zhanghailiang@huawei.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
zhanghailiang
479125d53e COLO: fix setting checkpoint-delay not working properly
If we set checkpoint-delay through command 'migrate-set-parameters',
It will not take effect until we finish last sleep chekpoint-delay,
That's will be offensive espeically when we want to change its value
from an extreme big one to a proper value.

Fix it by using timer to realize checkpoint-delay.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Message-Id: <1484657864-21708-2-git-send-email-zhang.zhanghailiang@huawei.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Halil Pasic
59046ec29a migration: consolidate VMStateField.start
The member VMStateField.start is used for two things, partial data
migration for VBUFFER data (basically provide migration for a
sub-buffer) and for locating next in QTAILQ.

The implementation of the VBUFFER feature is broken when VMSTATE_ALLOC
is used. This however goes unnoticed because actually partial migration
for VBUFFER is not used at all.

Let's consolidate the usage of VMStateField.start by removing support
for partial migration for VBUFFER.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>

Message-Id: <20170203175217.45562-1-pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Ashijeet Acharya
0827b9e97d migrate: Introduce zero RAM checks to skip RAM migration
Migration of a "none" machine with no RAM crashes abruptly as
bitmap_new() fails and thus aborts. Instead place zero RAM checks at
appropriate places to skip migration of RAM in this case and complete
migration successfully for devices only.

Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
Message-Id: <1486564125-31366-1-git-send-email-ashijeetacharya@gmail.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Pavel Butsykin
ced1c6166e migration: discard non-dirty ram pages after the start of postcopy
After the start of postcopy migration there are some non-dirty pages which have
already been migrated. These pages are no longer needed on the source vm so that
we can free them and it doen't hurt to complete the migration.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Message-Id: <20170203152321.19739-4-pbutsykin@virtuozzo.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Pavel Butsykin
53f09a1076 add 'release-ram' migrate capability
This feature frees the migrated memory on the source during postcopy-ram
migration. In the second step of postcopy-ram migration when the source vm
is put on pause we can free unnecessary memory. It will allow, in particular,
to start relaxing the memory stress on the source host in a load-balancing
scenario.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Message-Id: <20170203152321.19739-3-pbutsykin@virtuozzo.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
   Manually merged in Pavel's 'migration: madvise error_report fixup!'
2017-02-13 17:27:13 +00:00
Pavel Butsykin
9eb1476610 migration: add MigrationState arg for ram_save_/compressed_/page()
Cosmetic patch. The use of ms variable instead of migrate_get_current()
looks nicer, especially when there reuse.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Message-Id: <20170203152321.19739-2-pbutsykin@virtuozzo.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Amit Shah
cee887d969 MAINTAINERS: update my email address
I'm leaving my job at Red Hat, this email address will stop working next week.
Update it to one that I will have access to later.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1486120433-11628-1-git-send-email-amit.shah@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Amit Shah
c77a6c8dd7 migration: remove myself as maintainer
I'm switching jobs, and I'm not sure I can continue maintaining migration.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1486120416-11566-1-git-send-email-amit.shah@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-13 17:27:13 +00:00
Peter Maydell
305e6c8a2f Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Mon 13 Feb 2017 16:29:26 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  Makefile: Make "install" depend on "trace-events-all"
  docs: update manpage for stderr->log rename

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-13 16:44:04 +00:00
Fam Zheng
6eab3544f4 Makefile: Make "install" depend on "trace-events-all"
We install this file to data dir but since 0ab8ed18 it's no longer
required by any objects during "make". List it explicitly as a depended
target of install and fix the broken "make install" command.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 20170204143245.15974-1-famz@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-13 13:38:31 +00:00
Philipp Gesang
20f8a1392f docs: update manpage for stderr->log rename
With commit ed7f5f1d8d the name of
this backend changed from “stderr” to “log”.

Signed-off-by: Philipp Gesang <philipp.gesang@intra2net.com>
Message-id: 20170202114101.2655-1-philipp.gesang@intra2net.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-13 13:38:31 +00:00
Peter Maydell
df96bfab49 Merge remote-tracking branch 'remotes/kraxel/tags/pull-vga-20170213-1' into staging
vga: bugfixes for cirrus and virtio-gpu

# gpg: Signature made Mon 13 Feb 2017 08:14:47 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-vga-20170213-1:
  Revert "cirrus: allow zero source pitch in pattern fill rops"
  cirrus: fix patterncopy checks
  cirrus: replace debug printf with trace points
  vga: replace debug printf with trace points
  virtio-gpu: fix resource leak in virgl_cmd_resource_unref
  virtio-gpu: fix memory leak in set scanout

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-13 10:54:49 +00:00
Peter Maydell
0b4384d0bb Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2017-02-12' into staging
Block patches

# gpg: Signature made Sun 12 Feb 2017 01:26:20 GMT
# gpg:                using RSA key 0xF407DB0061D5CF40
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* remotes/maxreitz/tags/pull-block-2017-02-12: (21 commits)
  qemu-img: Avoid setting ret to unused value in img_convert()
  qemu-img: Use qemu_strtoul() rather than raw strtoul()
  qemu-io: don't allow I/O operations larger than BDRV_REQUEST_MAX_BYTES
  qcow2: Optimize the refcount-block overlap check
  qemu-io: Add failure regression tests
  qemu-iotests: Add _unsupported_fmt helper
  qemu-io: Return non-zero exit code on failure
  block/nfs: fix naming of runtime opts
  block/nfs: fix NULL pointer dereference in URI parsing
  block: bdrv_invalidate_cache: invalidate children first
  block/qapi: reduce the execution time of qmp_query_blockstats
  block/qapi: reduce the coupling between the bdrv_query_stats and bdrv_query_bds_stats
  qemu-iotest: test to lookup protocol-based image with relative backing
  qemu-iotests: Don't create fifos / pidfiles with protocol paths
  block: check full backing filename when searching protocol filenames
  block/vmdk: Fix the endian problem of buf_len and lba
  iotests: record separate timings per format,protocol pair
  iotests: Fix reference output for 059
  qapi: Tweak error message of bdrv_query_image_info
  qemu-img: Improve commit invalid base message
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-13 10:16:23 +00:00
Peter Maydell
ed3d90df7c Merge remote-tracking branch 'remotes/awilliam/tags/vfio-updates-20170210.0' into staging
VFIO updates 2017-02-10

 - Fix GTT wrap-around for Skylake IGD assignment (Alex Williamson)
 - Tag vfio-pci-igd-lpc-bridge as bridge device category (Thomas Huth)
 - Don't build calxeda-xgmac or amd-xgbe except on ARM (Thomas Huth)

# gpg: Signature made Fri 10 Feb 2017 21:34:33 GMT
# gpg:                using RSA key 0x239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B  8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-updates-20170210.0:
  hw/vfio: Add CONFIG switches for calxeda-xgmac and amd-xgbe
  hw/vfio/pci-quirks: Set category of the "vfio-pci-igd-lpc-bridge" device
  vfio-pci: Fix GTT wrap-around for Skylake+ IGD

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-13 09:30:15 +00:00
Peter Maydell
10d6eda192 qemu-img: Avoid setting ret to unused value in img_convert()
Coverity points out that we assign the return value from
bdrv_snapshot_load_tmp() to 'ret' in img_convert(), but then
never use that variable. (We check for failure by looking
at local_err instead.) Drop the unused assignment, bringing
the call into line with the following call to
bdrv_snapshot_laod_tmp_by_id_or_name().

(Fixes CID 1247240.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1486744104-15590-3-git-send-email-peter.maydell@linaro.org
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:56:32 +01:00
Peter Maydell
8b3c679228 qemu-img: Use qemu_strtoul() rather than raw strtoul()
Some of the argument parsing in qemu-img uses strtoul() to parse
integer arguments.  This is tricky to get correct and in fact the
code does not get it right, because it assigns the result of
strtoul() to an 'int' variable and then tries to check for > INT_MAX.
Coverity correctly complains that the comparison is always false.

Rewrite to use qemu_strtoul(), which has a saner convention for
reporting conversion failures.

(Fixes CID 1356421, CID 1356422, CID 1356423.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1486744104-15590-2-git-send-email-peter.maydell@linaro.org
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:53:31 +01:00
Alberto Garcia
3026c4688c qemu-io: don't allow I/O operations larger than BDRV_REQUEST_MAX_BYTES
Passing a request size larger than BDRV_REQUEST_MAX_BYTES to any of the
I/O commands results in an error. While 'read' and 'write' handle the
error correctly, 'aio_read' and 'aio_write' hit an assertion:

blk_aio_read_entry: Assertion `rwco->qiov->size == acb->bytes' failed.

The reason is that the QEMU I/O code cannot handle request sizes
larger than BDRV_REQUEST_MAX_BYTES, so this patch makes qemu-io check
that all values are within range.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: 79f66648c685929a144396bda24d13a207131dcf.1485878688.git.berto@igalia.com
[mreitz: Use BDRV_REQUEST_MAX_BYTES instead of INT_MAX]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:43 +01:00
Alberto Garcia
7061a07898 qcow2: Optimize the refcount-block overlap check
The metadata overlap checks introduced in a40f1c2add help detect
corruption in the qcow2 image by verifying that data writes don't
overlap with existing metadata sections.

The 'refcount-block' check in particular iterates over the refcount
table in order to get the addresses of all refcount blocks and check
that none of them overlap with the region where we want to write.

The problem with the refcount table is that since it always occupies
complete clusters its size is usually very big. With the default
values of cluster_size=64KB and refcount_bits=16 this table holds 8192
entries, each one of them enough to map 2GB worth of host clusters.

So unless we're using images with several TB of allocated data this
table is going to be mostly empty, and iterating over it is a waste of
CPU. If the storage backend is fast enough this can have an effect on
I/O performance.

This patch keeps the index of the last used (i.e. non-zero) entry in
the refcount table and updates it every time the table changes. The
refcount-block overlap check then uses that index instead of reading
the whole table.

In my tests with a 4GB qcow2 file stored in RAM this doubles the
amount of write IOPS.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: 20170201123828.4815-1-berto@igalia.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:43 +01:00
Nir Soffer
bf68bcb18e qemu-io: Add failure regression tests
Add regression tests checking that qemu-io fails with non-zero exit code
when reading non-existing file or using the wrong image format.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Message-id: 20170201003120.23378-4-nirsof@gmail.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Nir Soffer
b4a2caa4bd qemu-iotests: Add _unsupported_fmt helper
This helper allows adding tests supporting any format expect the
specified formats. This may be useful to test that many formats behave
in a common way.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Message-id: 20170201003120.23378-3-nirsof@gmail.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Nir Soffer
b7aa131519 qemu-io: Return non-zero exit code on failure
The result of openfile was not checked, leading to failure deep in the
actual command with confusing error message, and exiting with exit code 0.

Here is a simple example - trying to read with the wrong format:

    $ touch file
    $ qemu-io -f qcow2 -c 'read -P 1 0 1024' file; echo $?
    can't open device file: Image is not in qcow2 format
    no file open, try 'help open'
    0

With this patch, we fail earlier with exit code 1:

    $ ./qemu-io -f qcow2 -c 'read -P 1 0 1024' file; echo $?
    can't open device file: Image is not in qcow2 format
    1

Failing earlier, we don't log this error now:

    no file open, try 'help open'

But some tests expected it; the line was removed from the test output.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170201003120.23378-2-nirsof@gmail.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Peter Lieven
f67409a5bb block/nfs: fix naming of runtime opts
commit 94d6a7a accidentally left the naming of runtime opts and QAPI
scheme inconsistent. As one consequence passing of parameters in the
URI is broken. Sync the naming of the runtime opts to the QAPI
scheme.

Please note that this is technically backwards incompatible with the 2.8
release, but the 2.8 release is the only version that had the wrong naming.
Furthermore release 2.8 suffered from a NULL pointer dereference during
URI parsing.

Fixes: 94d6a7a76e
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Lieven <pl@kamp.de>
Message-id: 1485942829-10756-3-git-send-email-pl@kamp.de
[mreitz: Fixed commit message]
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Peter Lieven
8d20abe87a block/nfs: fix NULL pointer dereference in URI parsing
parse_uint_full wants to put the parsed value into the
variable passed via its second argument which is NULL.

Fixes: 94d6a7a76e
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1485942829-10756-2-git-send-email-pl@kamp.de
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Vladimir Sementsov-Ogievskiy
16e977d506 block: bdrv_invalidate_cache: invalidate children first
Current implementation invalidates firstly parent bds and then its
children. This leads to the following bug:

after incoming migration, in bdrv_invalidate_cache_all:
1. invalidate parent bds - reopen it with BDRV_O_INACTIVE cleared
2. child is not yet invalidated
3. parent check that its BDRV_O_INACTIVE is cleared
4. parent writes to child
5. assert in bdrv_co_pwritev, as BDRV_O_INACTIVE is set for child

This patch fixes it by just changing invalidate sequence: invalidate
children first.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20170131112308.54189-1-vsementsov@virtuozzo.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Dou Liyang
a6baa60807 block/qapi: reduce the execution time of qmp_query_blockstats
In order to reduce the execution time, this patch optimize
the qmp_query_blockstats():
Remove the next_query_bds function.
Remove the bdrv_query_stats function.
Remove some judgement sentence.

The original qmp_query_blockstats calls next_query_bds to get
the next objects in each loops. In the next_query_bds, it checks
the query_nodes and blk. It also call bdrv_query_stats to get
the stats, In the bdrv_query_stats, it checks blk and bs each
times. This waste more times, which may stall the main loop a
bit. And if the disk is too many and donot use the dataplane
feature, this may affect the performance in main loop thread.

This patch removes that two functions, and makes the structure
clearly.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Message-id: 1484467275-27919-3-git-send-email-douly.fnst@cn.fujitsu.com
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[mreitz: Removed duplicate info->value assignment]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Dou Liyang
20a6d768f5 block/qapi: reduce the coupling between the bdrv_query_stats and bdrv_query_bds_stats
The bdrv_query_stats and bdrv_query_bds_stats functions need to call
each other, that increases the coupling. it also makes the program
complicated and makes some unnecessary tests.

Remove the call from bdrv_query_bds_stats to bdrv_query_stats, just
take some recursion to make it clearly.

Avoid testing whether the blk is NULL during querying the bds stats.
It is unnecessary.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Message-id: 1484467275-27919-2-git-send-email-douly.fnst@cn.fujitsu.com
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Jeff Cody
256e3b6387 qemu-iotest: test to lookup protocol-based image with relative backing
This test uses NFS and block-stream to force a lookup of a backing
image that has a relative filename, but a full backing image name
with the protocol path intact.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Message-id: 1a7a3d6e6d8af36cd5b47ed6ea93b5a9ededf81b.1485392617.git.jcody@redhat.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Jeff Cody
846a1d118e qemu-iotests: Don't create fifos / pidfiles with protocol paths
Trying to create, use, and remove fifos and pidfiles on protocol paths
(e.g. nfs://localhost/scratch/qemu-nbd.pid) is obviously broken.

Use the local $TEST_DIR path before it is 'protocolized' for these
files.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Message-id: bb4a731a35bc4ac81fe3db17479dd686315317c7.1485392617.git.jcody@redhat.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Jeff Cody
418661e032 block: check full backing filename when searching protocol filenames
In bdrv_find_backing_image(), if we are searching an image for a backing
file that contains a protocol, we currently only compare unmodified
paths.

However, some management software will change the backing filename to be
a relative filename in a path.  QEMU is able to handle this fine,
because internally it will use path_combine to put together the full
protocol URI.

However, this can lead to an inability to match an image during a QAPI
command that needs to use bdrv_find_backing_image() to find the image,
when it is searched by the full URI.

When searching for a protocol filename, if the straight comparison
fails, this patch will also compare against the full backing filename to
see if that is a match.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Message-id: c2d025adca8a2b665189e6f4cf080f44126d0b6b.1485392617.git.jcody@redhat.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
QingFeng Hao
4545d4f4af block/vmdk: Fix the endian problem of buf_len and lba
The problem was triggered by qemu-iotests case 055. It failed when it
was comparing the compressed vmdk image with original test.img.

The cause is that buf_len in vmdk_write_extent wasn't converted to
little-endian before it was stored to disk. But later vmdk_read_extent
read it and converted it from little-endian to cpu endian.
If the cpu is big-endian like s390, the problem will happen and
the data length read by vmdk_read_extent will become invalid!
The fix is to add the conversion in vmdk_write_extent, meanwhile,
repair the endianness problem of lba field which shall also be converted
to little-endian before storing to disk.

Cc: qemu-stable@nongnu.org
Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Signed-off-by: Jing Liu <liujbjl@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20161216052040.53067-2-haoqf@linux.vnet.ibm.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Daniel P. Berrange
36bd422812 iotests: record separate timings per format,protocol pair
The 'check' program records timings for each test that
is run. These timings are only valid, however, for a
particular format/protocol combination. So if frequently
running 'check' with a variety of different formats or
protocols, the times printed can be very misleading.

Instead of having a single 'check.time' file, maintain
multiple 'check.time-$IMGPROTO-$IMGFMT' files.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170103160556.9895-1-berrange@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:42 +01:00
Fam Zheng
53b63460f6 iotests: Fix reference output for 059
It was broken by efaa7c4eeb when it dropped the device name "image"
from BB API.  Now this error message text is updated again, sync it up.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 20170119130759.28319-3-famz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:41 +01:00
Fam Zheng
9adceb0213 qapi: Tweak error message of bdrv_query_image_info
@bs doesn't always have a device name, such as when it comes from
"qemu-img info". Report file name instead.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 20170119130759.28319-2-famz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:41 +01:00
Max Reitz
6b33f3ae8b qemu-img: Improve commit invalid base message
When trying to invoke qemu-img commit with a base image file name that
is not part of the top image's backing chain, the user receives a rather
plain "Base not found" error message. This is not really helpful because
it does not explain what "not found" means, potentially leaving the user
wondering why qemu cannot find a file despite it clearly existing in the
file system.

Improve the error message by clarifying that "not found" means "not
found in the top image's backing chain".

Reported-by: Ala Hino <ahino@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20161201020508.24417-1-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:41 +01:00
QingFeng Hao
b135233b0d iotests: Fix a problem in common.filter
If TEST_DIR is set to /tmp, test case 144 will fail. The reason is that
TEST_DIR resembles 144's test image name tmp.qcow2.
When 144 is testing $TEST_DIR/tmp.qcow2, it wants to replace
$TEST_DIR/tmp.qcow2 to TEST_DIR/tmp.qcow2, but actually it will fail
and get TEST_DIRTEST_DIR.qcow2 in this case.
The fix is just to modify the code to replace $TEST_DIR/ with TEST_DIR/.

Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Message-id: 20161216054723.96055-2-haoqf@linux.vnet.ibm.com
Reviewed-by: Eric Blake <eblake@redhat.com>
[mreitz: Fixed commit message and dropped superfluous escaping]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-02-12 00:47:41 +01:00
Thomas Huth
e197de50c6 hw/vfio: Add CONFIG switches for calxeda-xgmac and amd-xgbe
Both devices seem to be specific to the ARM platform. It's confusing
for the users if they show up on other target architectures, too
(e.g. when the user runs QEMU with "-device ?" to get a list of
supported devices). Thus let's introduce proper configuration switches
so that the devices are only compiled and included when they are
really required.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-10 13:12:03 -07:00
Thomas Huth
f23363ea44 hw/vfio/pci-quirks: Set category of the "vfio-pci-igd-lpc-bridge" device
The device has "bridge" in its name, so it should obviously be in
the category DEVICE_CATEGORY_BRIDGE.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-10 13:12:03 -07:00
Alex Williamson
ac2a9862b7 vfio-pci: Fix GTT wrap-around for Skylake+ IGD
Previous IGD, up through Broadwell, only seem to write GTT values into
the first 1MB of space allocated for the BDSM, but clearly the GTT
can be multiple MB in size.  Our test in vfio_igd_quirk_data_write()
correctly filters out indexes beyond 1MB, but given the 1MB mask we're
using, we re-apply writes only to the first 1MB of the guest allocated
BDSM.

We can't assume either the host or guest BDSM is naturally aligned, so
we can't simply apply a different mask.  Instead, save the host BDSM
and do the arithmetic to subtract the host value to get the BDSM
offset and add it to the guest allocated BDSM.

Reported-by: Alexander Indenbaum <alexander.indenbaum@gmail.com>
Tested-by: Alexander Indenbaum <alexander.indenbaum@gmail.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-10 13:12:03 -07:00
Peter Maydell
6311b19b5c Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170210' into staging
target-arm queue:
 * aspeed: minor fixes
 * virt: declare fwcfg and virtio-mmio as DMA coherent in DT & ACPI
 * arm: enable basic TCG emulation of PMU for AArch64

# gpg: Signature made Fri 10 Feb 2017 18:06:30 GMT
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170210:
  aspeed/smc: use a modulo to check segment limits
  aspeed/smc: handle dummies only in fast read mode
  aspeed: remove useless comment on controller segment size
  aspeed: check for negative values returned by blk_getlength()
  hw/arm/virt: Declare fwcfg as dma cache coherent in dt
  hw/arm/virt: Declare fwcfg as dma cache coherent in ACPI
  hw/arm/virt: Declare virtio-mmio as dma cache coherent in ACPI
  target-arm: Declare virtio-mmio as dma-coherent in dt
  target-arm: Enable vPMU support under TCG mode
  target-arm: Add support for PMU register PMINTENSET_EL1
  target-arm: Add support for AArch64 PMU register PMXEVTYPER_EL0
  target-arm: Add support for PMU register PMSELR_EL0

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 18:54:30 +00:00
Peter Maydell
98b2faeaee Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into staging
# gpg: Signature made Fri 10 Feb 2017 16:47:54 GMT
# gpg:                using RSA key 0x7DEF8106AAFC390E
# gpg: Good signature from "John Snow (John Huston) <jsnow@redhat.com>"
# Primary key fingerprint: FAEB 9711 A12C F475 812F  18F2 88A9 064D 1835 61EB
#      Subkey fingerprint: F9B7 ABDB BCAC DF95 BE76  CBD0 7DEF 8106 AAFC 390E

* remotes/jnsnow/tags/ide-pull-request:
  ahci: advertise HOST_CAP_64

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 18:07:02 +00:00
Cédric Le Goater
b4cc583f02 aspeed/smc: use a modulo to check segment limits
The size of a segment is not necessarily a power of 2.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1486648058-520-5-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:30 +00:00
Cédric Le Goater
1a6d4fc27d aspeed/smc: handle dummies only in fast read mode
HW works fine in normal read mode with dummy bytes being set. So let's
check this case to not transfer bytes.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1486648058-520-4-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Cédric Le Goater
93bf276d5f aspeed: remove useless comment on controller segment size
The flash devices used for the FMC controller (BMC firmware) are well
defined for each Aspeed machine and are all smaller than the default
mapping window size, at least for CE0 which is the chip the SoC boots
from.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1486648058-520-3-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Cédric Le Goater
0c7209bee8 aspeed: check for negative values returned by blk_getlength()
write_boot_rom() does not check for negative values. This is more a
problem for coverity than the actual code as the size of the flash
device is checked when the m25p80 object is created. If there is
anything wrong with the backing file, we should not even reach that
path.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1486648058-520-2-git-send-email-clg@kaod.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Alexander Graf
14efdb5cb3 hw/arm/virt: Declare fwcfg as dma cache coherent in dt
Fw-cfg recently learned how to directly access guest memory and does so in
cache coherent fashion. Tell the guest about that fact when it's using DT.

Signed-off-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Message-id: 1486644810-33181-5-git-send-email-agraf@suse.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Alexander Graf
3b5c492b1c hw/arm/virt: Declare fwcfg as dma cache coherent in ACPI
Fw-cfg recently learned how to directly access guest memory and does so in
cache coherent fashion. Tell the guest about that fact when it's using ACPI.

Signed-off-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Message-id: 1486644810-33181-4-git-send-email-agraf@suse.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Alexander Graf
76266d9913 hw/arm/virt: Declare virtio-mmio as dma cache coherent in ACPI
Virtio-mmio devices can directly access guest memory and do so in cache
coherent fashion. Tell the guest about that fact when it's using ACPI.

Signed-off-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Message-id: 1486644810-33181-3-git-send-email-agraf@suse.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Alexander Graf
054bb7b215 target-arm: Declare virtio-mmio as dma-coherent in dt
QEMU emulated hardware is always dma coherent with its guest. We do
annotate that correctly on the PCI host controller, but left out
virtio-mmio.

Recent kernels have started to interpret that flag rather than take
dma coherency as granted with virtio-mmio. While that is considered
a kernel bug, as it breaks previously working systems, it showed that
our dt description is incomplete.

This patch adds the respective marker that allows guest OSs to evaluate
that our virtio-mmio devices are indeed cache coherent.

Signed-off-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Message-id: 1486644810-33181-2-git-send-email-agraf@suse.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:29 +00:00
Wei Huang
d6f02ce3b8 target-arm: Enable vPMU support under TCG mode
This patch contains several fixes to enable vPMU under TCG mode. It
first removes the checking of kvm_enabled() while unsetting
ARM_FEATURE_PMU. With it, the .pmu option can be used to turn on/off vPMU
under TCG mode. Secondly the PMU node of DT table is now created under TCG.
The last fix is to disable the masking of PMUver field of ID_AA64DFR0_EL1.

Signed-off-by: Wei Huang <wei@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1486504171-26807-5-git-send-email-wei@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:28 +00:00
Wei Huang
e6ec54571e target-arm: Add support for PMU register PMINTENSET_EL1
This patch adds access support for PMINTENSET_EL1.

Signed-off-by: Wei Huang <wei@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1486504171-26807-4-git-send-email-wei@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:28 +00:00
Wei Huang
fdb8665672 target-arm: Add support for AArch64 PMU register PMXEVTYPER_EL0
In order to support Linux perf, which uses PMXEVTYPER register,
this patch adds read/write access support for PMXEVTYPER. The access
is CONSTRAINED UNPREDICTABLE when PMSELR is not 0x1f. Additionally
this patch adds support for PMXEVTYPER_EL0.

Signed-off-by: Wei Huang <wei@redhat.com>
Message-id: 1486504171-26807-3-git-send-email-wei@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:28 +00:00
Wei Huang
6b0407805d target-arm: Add support for PMU register PMSELR_EL0
This patch adds support for AArch64 register PMSELR_EL0. The existing
PMSELR definition is revised accordingly.

Signed-off-by: Wei Huang <wei@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: Moved #ifndef CONFIG_USER_ONLY to cover new regdefs]
Message-id: 1486504171-26807-2-git-send-email-wei@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 17:40:28 +00:00
Ladi Prosek
98cb5dccb1 ahci: advertise HOST_CAP_64
The AHCI emulation code supports 64-bit addressing and should advertise this
fact in the Host Capabilities register. Both Linux and Windows drivers test
this bit to decide if the upper 32 bits of various registers may be written
to, and at least some versions of Windows have a bug where DMA is attempted
with an address above 4GB but, in the absence of HOST_CAP_64, the upper 32
bits are left unititialized which leads to a memory corruption.

[Maintainer edit:

This fixes https://bugzilla.redhat.com/show_bug.cgi?id=1411105,
which affects Windows Server 2008 SP2 in some cases.]

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Message-id: 1484305370-6220-1-git-send-email-lprosek@redhat.com
[Amended commit message --js]
Signed-off-by: John Snow <jsnow@redhat.com>
2017-02-10 11:47:11 -05:00
Gerd Hoffmann
12e97ec399 Revert "cirrus: allow zero source pitch in pattern fill rops"
This reverts commit 5858dd1801.

Conflicts:
	hw/display/cirrus_vga.c

Cc: Wolfgang Bumiller <w.bumiller@proxmox.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-id: 1486645341-5010-2-git-send-email-kraxel@redhat.com
2017-02-10 16:49:45 +01:00
Gerd Hoffmann
95280c31cd cirrus: fix patterncopy checks
The blit_region_is_unsafe checks don't work correctly for the
patterncopy source.  It's a fixed-sized region, which doesn't
depend on cirrus_blt_{width,height}.  So go do the check in
cirrus_bitblt_common_patterncopy instead, then tell blit_is_unsafe that
it doesn't need to verify the source.  Also handle the case where we
blit from cirrus_bitbuf correctly.

This patch replaces 5858dd1801.

Security impact:  I think for the most part error on the safe side this
time, refusing blits which should have been allowed.

Only exception is placing the blit source at the end of the video ram,
so cirrus_blt_srcaddr + 256 goes beyond the end of video memory.  But
even in that case I'm not fully sure this actually allows read access to
host memory.  To trick the commit 5858dd18 security checks one has to
pick very small cirrus_blt_{width,height} values, which in turn implies
only a fraction of the blit source will actually be used.

Cc: Wolfgang Bumiller <w.bumiller@proxmox.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-id: 1486645341-5010-1-git-send-email-kraxel@redhat.com
2017-02-10 16:49:45 +01:00
Gerd Hoffmann
ec87f206d7 cirrus: replace debug printf with trace points
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1486561893-26470-2-git-send-email-kraxel@redhat.com
2017-02-10 16:49:45 +01:00
Gerd Hoffmann
cf7dabeebc vga: replace debug printf with trace points
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1486561893-26470-1-git-send-email-kraxel@redhat.com
2017-02-10 16:49:45 +01:00
Gerd Hoffmann
5e8e3c4c75 virtio-gpu: fix resource leak in virgl_cmd_resource_unref
When the guest sends VIRTIO_GPU_CMD_RESOURCE_UNREF without detaching the
backing storage beforehand (VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING)
we'll leak memory.

This patch fixes it for 3d mode, simliar to the 2d mode fix in commit
"b8e2392 virtio-gpu: call cleanup mapping function in resource destroy".

Reported-by: 李强 <liqiang6-s@360.cn>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1485167210-4757-1-git-send-email-kraxel@redhat.com
2017-02-10 16:49:45 +01:00
Li Qiang
dd248ed7e2 virtio-gpu: fix memory leak in set scanout
In virtio_gpu_set_scanout function, when creating the 'rect'
its refcount is set to 2, by pixman_image_create_bits and
qemu_create_displaysurface_pixman function. This can lead
a memory leak issues. This patch avoid this issue.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 5884626f.5b2f6b0a.1bfff.3037@mx.google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-10 16:49:45 +01:00
Thomas Huth
61eedf7aec tests/prom-env: Ease time-out problems on slow hosts
Peter Maydell recently ran into time-out problems with the
prom-env test on a rather slow ARM board. To tackle this issue,
we can speed up the test by running QEMU with "-nodefaults" for
the pseries machine, so that SLOF has less devices to scan during
boot, and by using the "nvramrc" environment variable instead of
"boot-command", since this variable is evaluated earlier in the
boot process.
And to be really sure that we do not face such time out problems
again, let's also increase the time out value from 100s to 120s
instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-id: 1486739699-1076-1-git-send-email-thuth@redhat.com
Tested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 15:44:53 +00:00
Peter Maydell
33d076ebd0 Merge remote-tracking branch 'remotes/stsquad/tags/pull-travis-10022017-1' into staging
One minor fix and a build split to reduce timeouts.

# gpg: Signature made Fri 10 Feb 2017 14:46:52 GMT
# gpg:                using RSA key 0xFBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>"
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8  DF35 FBD0 DB09 5A9E 2A44

* remotes/stsquad/tags/pull-travis-10022017-1:
  .travis.yml: split VM based builds
  .travis.yml: don't specify CONFIG twice

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-10 15:05:37 +00:00
Alex Bennée
78a22af040 .travis.yml: split VM based builds
The Trusty based builds run a little slower than the main container
based ones. This is also true for the latest version of Clang. The
builds are getting very close (and occasionally run over) the 50 minute
timeout. Rather than partitioning by target I just split them into
linux-user and system builds.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-10 13:19:56 +00:00
Alex Bennée
fed5364971 .travis.yml: don't specify CONFIG twice
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-10 13:19:56 +00:00
Peter Maydell
8b1897725d Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20170209-2' into staging
vnc: add support for multiple listening sockets.
vnc: misc fixes and cleanups.

# gpg: Signature made Thu 09 Feb 2017 16:45:02 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-ui-20170209-2:
  ui: add ability to specify multiple VNC listen addresses
  util: add iterators for QemuOpts values
  ui: let VNC server listen on all resolved IP addresses
  ui: extract code to connect/listen from vnc_display_open
  ui: refactor code for populating SocketAddress from vnc_display_open
  ui: refactor VncDisplay to allow multiple listening sockets
  ui: fix reporting of VNC auth in query-vnc-servers
  ui: fix regression handling bare 'websocket' option to -vnc
  vnc: do not disconnect on EAGAIN
  ui/vnc: Drop unused vnc_has_job() and vnc_jobs_clear()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-09 16:58:39 +00:00
Daniel P. Berrange
396f935a9a ui: add ability to specify multiple VNC listen addresses
This change allows the listen address and websocket address
options for -vnc to be repeated. This causes the VNC server
to listen on multiple addresses. e.g.

 $ $QEMU -vnc vnc=localhost:1,vnc=unix:/tmp/vnc,\
              websocket=127.0.0.1:8080,websocket=[::]:8081

results in listening on

127.0.0.1:5901, 127.0.0.1:8080, ::1:5901, :::8081 & /tmp/vnc

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170203120649.15637-9-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-09 17:28:49 +01:00
Daniel P. Berrange
e998e2090f util: add iterators for QemuOpts values
To iterate over all QemuOpts currently requires using a callback
function which is inconvenient for control flow. Add support for
using iterator functions more directly

  QemuOptsIter iter;
  QemuOpt *opt;

  qemu_opts_iter_init(&iter, opts, "repeated-key");
  while ((opt = qemu_opts_iter_next(&iter)) != NULL) {
      ....do something...
  }

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170203120649.15637-8-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-09 17:28:49 +01:00
Daniel P. Berrange
57a6d6d538 ui: let VNC server listen on all resolved IP addresses
Remove the limitation that the VNC server can only listen on
a single resolved IP address. This uses the new DNS resolver
API to resolve a SocketAddress struct into an array of
SocketAddress structs containing raw IP addresses. The VNC
server will then attempt to listen on all resolved IP addresses.
The server must successfully listen on at least one of the
resolved IP addresses, otherwise an error will be reported.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170203120649.15637-7-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-09 17:28:49 +01:00
Daniel P. Berrange
8bd22f477f ui: extract code to connect/listen from vnc_display_open
The code which takes a SocketAddress and connects/listens on the
network is going to get more complicated to deal with multiple
listeners. Pull it out into a separate method to avoid making the
vnc_display_open method even more complex.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170203120649.15637-6-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-09 17:28:49 +01:00
Daniel P. Berrange
275e0d616b ui: refactor code for populating SocketAddress from vnc_display_open
The code which interprets the CLI args to populate the SocketAddress
objects for plain & websockets VNC is quite complex already and will
need further enhancements shortly. Refactor it into separate methods
to avoid vnc_display_open getting even larger. As a side effect of
the refactoring, it is now possible to specify a listen address for
the websocket server explicitly. e.g,

  -vnc localhost:5900,websockets=0.0.0.0:8080

will listen on localhost for the plain VNC server, but expose the
websockets VNC server on the public interface. This refactoring
also removes the restriction that prevents enabling websockets
when the plain VNC server is listening on a UNIX socket.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170203120649.15637-5-berrange@redhat.com

[ kraxel: squashed clang build fix ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-09 17:28:45 +01:00
Daniel P. Berrange
4ee74fa708 ui: refactor VncDisplay to allow multiple listening sockets
Currently there is only a single listener for plain VNC and
a single listener for websockets VNC. This means that if
getaddrinfo() returns multiple IP addresses, for a hostname,
the VNC server can only listen on one of them. This is
just bearable if listening on wildcard interface, or if
the host only has a single network interface to listen on,
but if there are multiple NICs and the VNC server needs
to listen on 2 or more specific IP addresses, it can't be
done.

This refactors the VncDisplay state so that it holds an
array of listening sockets, but still only listens on
one socket.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170203120649.15637-4-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-08 14:59:37 +01:00
Daniel P. Berrange
2a7e6857cd ui: fix reporting of VNC auth in query-vnc-servers
Currently the VNC authentication info is emitted at the
top level of the query-vnc-servers data. This is wrong
because the authentication scheme differs between plain
and websockets when TLS is enabled. We should instead
report auth against the individual servers. e.g.

(QEMU) query-vnc-servers
{
    "return": [
        {
            "clients": [],
            "id": "default",
            "auth": "vencrypt",
            "vencrypt": "x509-vnc",
            "server": [
                {
                    "host": "127.0.0.1"
                    "service": "5901",
                    "websocket": false,
                    "family": "ipv4",
                    "auth": "vencrypt",
                    "vencrypt": "x509-vnc"
                },
                {
                    "host": "127.0.0.1",
                    "service": "5902",
                    "websocket": true,
                    "family": "ipv4",
                    "auth": "vnc"
                }
            ]
        }
    ]
}

This also future proofs the QMP schema so that we can
cope with multiple VNC server instances, listening on
different interfaces or ports, with different auth
setup.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170203120649.15637-3-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-08 14:59:37 +01:00
Daniel P. Berrange
1b1aeb5828 ui: fix regression handling bare 'websocket' option to -vnc
The -vnc argument is documented as accepting two syntaxes for
the 'websocket' option, either a bare option name, or a port
number. If using the bare option name, it is supposed to apply
the display number as an offset to base port 5700. e.g.

  -vnc localhost:3,websocket

should listen on port 5703, however, this was broken in 2.3.0 since

  commit 4db14629c3
  Author: Gerd Hoffmann <kraxel@redhat.com>
  Date:   Tue Sep 16 12:33:03 2014 +0200

    vnc: switch to QemuOpts, allow multiple servers

instead qemu tries to listen on port "on" which gets looked up in
/etc/services and fails.

Fixes bug: #1455912

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170203120649.15637-2-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-08 14:59:37 +01:00
Michael Tokarev
537848ee62 vnc: do not disconnect on EAGAIN
When qemu vnc server is trying to send large update to clients,
there might be a situation when system responds with something
like EAGAIN, indicating that there's no system memory to send
that much data (depending on the network speed, client and server
and what is happening).  In this case, something like this happens
on qemu side (from strace):

sendmsg(16, {msg_name(0)=NULL,
        msg_iov(1)=[{"\244\"..., 729186}],
        msg_controllen=0, msg_flags=0}, 0) = 103950
sendmsg(16, {msg_name(0)=NULL,
        msg_iov(1)=[{"lz\346"..., 1559618}],
        msg_controllen=0, msg_flags=0}, 0) = -1 EAGAIN
sendmsg(-1, {msg_name(0)=NULL,
        msg_iov(1)=[{"lz\346"..., 1559618}],
        msg_controllen=0, msg_flags=0}, 0) = -1 EBADF

qemu closes the socket before the retry, and obviously it gets EBADF
when trying to send to -1.

This is because there WAS a special handling for EAGAIN, but now it doesn't
work anymore, after commit 04d2529da2, because
now in all error-like cases we initiate vnc disconnect.

This change were introduced in qemu 2.6, and caused numerous grief for many
people, resulting in their vnc clients reporting sporadic random disconnects
from vnc server.

Fix that by doing the disconnect only when necessary, i.e. omitting this
very case of EAGAIN.

Hopefully the existing condition (comparing with QIO_CHANNEL_ERR_BLOCK)
is sufficient, as the original code (before the above commit) were
checking for other errno values too.

Apparently there's another (semi?)bug exist somewhere here, since the
code tries to write to fd# -1, it probably should check if the connection
is open before. But this isn't important.

Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1486115549-9398-1-git-send-email-mjt@msgid.tls.msk.ru
Fixes: 04d2529da2
Cc: Daniel P. Berrange <berrange@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-08 14:59:36 +01:00
Peter Maydell
c3ff04b60d ui/vnc: Drop unused vnc_has_job() and vnc_jobs_clear()
The functions vnc_has_job() and vnc_jobs_clear() are
never used; remove them.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Message-id: 1486146260-8092-1-git-send-email-peter.maydell@linaro.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-08 14:59:36 +01:00
Peter Maydell
f073cd3a2b Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170207-1' into staging
target-arm:
 * new "unimplemented" device for stubbing out devices in a
   system model so accesses can be logged
 * stellaris: document the SoC memory map
 * arm: create instruction syndromes for AArch32 data aborts
 * arm: Correctly handle watchpoints for BE32 CPUs
 * Fix Thumb-1 BE32 execution and disassembly
 * arm: Add cfgend parameter for ARM CPU selection
 * sd: sdhci: check data length during dma_memory_read
 * aspeed: add a watchdog controller
 * integratorcp: adding vmstate for save/restore

# gpg: Signature made Tue 07 Feb 2017 19:20:19 GMT
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170207-1:
  stellaris: Use the 'unimplemented' device for parts we don't implement
  hw/misc: New "unimplemented" sysbus device
  stellaris: Document memory map and which SoC devices are unimplemented
  target/arm: A32, T32: Create Instruction Syndromes for Data Aborts
  target/arm: Abstract out pbit/wbit tests in ARM ldr/str decode
  arm: Correctly handle watchpoints for BE32 CPUs
  Fix Thumb-1 BE32 execution and disassembly.
  target/arm: Add cfgend parameter for ARM CPU selection.
  hw/arm/integratorcp: Support specifying features via -cpu
  sd: sdhci: check data length during dma_memory_read
  aspeed: add a watchdog controller
  wdt: Add Aspeed watchdog device model
  integratorcp: adding vmstate for save/restore

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 19:21:30 +00:00
Peter Maydell
aecfbbc97a stellaris: Use the 'unimplemented' device for parts we don't implement
Use the 'unimplemented' dummy device to cover regions of the
SoC device memory map which we don't have proper device
implementations for yet.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1484247815-15279-4-git-send-email-peter.maydell@linaro.org
2017-02-07 18:55:15 +00:00
Peter Maydell
f5095aa380 hw/misc: New "unimplemented" sysbus device
Create a new "unimplemented" sysbus device, which simply accepts
all read and write accesses, and implements them as read-as-zero,
write-ignored, with logging of the access as LOG_UNIMP.

This is useful for stubbing out bits of an SoC or board model
which haven't been written yet.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1484247815-15279-3-git-send-email-peter.maydell@linaro.org
2017-02-07 18:55:15 +00:00
Peter Maydell
394c8bbfb7 stellaris: Document memory map and which SoC devices are unimplemented
Add a comment documenting the memory map of the SoC devices and which
are not implemented.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1484247815-15279-2-git-send-email-peter.maydell@linaro.org
2017-02-07 18:55:15 +00:00
Peter Maydell
9bb6558a21 target/arm: A32, T32: Create Instruction Syndromes for Data Aborts
Add support for generating the ISS (Instruction Specific Syndrome)
for Data Abort exceptions taken from AArch32. These syndromes are
used by hypervisors for example to trap and emulate memory accesses.

This is the equivalent for AArch32 guests of the work done for AArch64
guests in commit aaa1f954d4.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-02-07 18:30:00 +00:00
Peter Maydell
63f26fcfda target/arm: Abstract out pbit/wbit tests in ARM ldr/str decode
In the ARM ldr/str decode path, rather than directly testing
"insn & (1 << 21)" and "insn & (1 << 24)", abstract these
bits out into wbit and pbit local flags. (We will want to
do more tests against them to determine whether we need to
provide syndrome information.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2017-02-07 18:29:59 +00:00
Julian Brown
4061200059 arm: Correctly handle watchpoints for BE32 CPUs
In BE32 mode, sub-word size watchpoints can fail to trigger because the
address of the access is adjusted in the opcode helpers before being
compared with the watchpoint registers.  This patch reverses the address
adjustment before performing the comparison with the help of a new CPUClass
hook.

This version of the patch augments and tidies up comments a little.

Signed-off-by: Julian Brown <julian@codesourcery.com>
Message-id: caaf64ffc72f6ae183015337b7afdbd4b8989cb6.1484929304.git.julian@codesourcery.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Julian Brown
f7478a92dd Fix Thumb-1 BE32 execution and disassembly.
Thumb-1 code has some issues in BE32 mode (as currently implemented). In
short, since bytes are swapped within words at load time for BE32
executables, this also swaps pairs of adjacent Thumb-1 instructions.

This patch un-swaps those pairs of instructions again, both for execution,
and for disassembly. (The previous version of the patch always read four
bytes in arm_read_memory_func and then extracted the proper two bytes,
in a probably misguided attempt to match the behaviour of actual hardware
as described by e.g. the ARM9TDMI TRM, section 3.3 "Endian effects for
instruction fetches". It's less complicated to just read the correct
two bytes though.)

Signed-off-by: Julian Brown <julian@codesourcery.com>
Message-id: ca20462a044848000370318a8bd41dd0a4ed273f.1484929304.git.julian@codesourcery.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Julian Brown
3a062d5730 target/arm: Add cfgend parameter for ARM CPU selection.
Add a new "cfgend" property which selects whether the CPU resets into
big-endian mode or not.  This setting affects whether we reset with
SCTLR_B (ARMv6 and earlier) or SCTLR_EE (ARMv7 and later) set.

Signed-off-by: Julian Brown <julian@codesourcery.com>
Message-id: 11420d1c49636c1790e60578ee996e51f0f0b835.1484929304.git.julian@codesourcery.com
[PMM: use error_report_err() rather than error_report();
 move the integratorcp changes to their own patch;
 drop an unnecessary extra #include;
 rephrase commit message accordingly;
 move setting of reset_sctlr above registration of cpregs
 so it actually has an effect]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Julian Brown
00909b5858 hw/arm/integratorcp: Support specifying features via -cpu
Since the integratorcp board creates the CPU object directly
rather than via cpu_arm_init(), we have to call the CPU
class parse_features() method ourselves if we want to
support the user passing features via the -cpu command
line argument as well as just the cpu name. Do so.

Signed-off-by: Julian Brown <julian@codesourcery.com>
[PMM: split out into its own patch]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Prasad J Pandit
42922105be sd: sdhci: check data length during dma_memory_read
While doing multi block SDMA transfer in routine
'sdhci_sdma_transfer_multi_blocks', the 's->fifo_buffer' starting
index 'begin' and data length 's->data_count' could end up to be same.
This could lead to an OOB access issue. Correct transfer data length
to avoid it.

Cc: qemu-stable@nongnu.org
Reported-by: Jiang Xin <jiangxin1@huawei.com>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 20170130064736.9236-1-ppandit@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Cédric Le Goater
013befe1ca aspeed: add a watchdog controller
This enables reboot of a guest from U-Boot and Linux.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Message-id: 1485452251-1593-3-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Cédric Le Goater
854123bf8d wdt: Add Aspeed watchdog device model
The Aspeed SoC includes a set of watchdog timers using 32-bit
decrement counters, which can be based either on the APB clock or
a 1 MHz clock.

The watchdog timer is designed to prevent system deadlock and, in
general, it should be restarted before timeout. When a timeout occurs,
different types of signals can be generated, ARM reset, SOC reset,
System reset, CPU Interrupt, external signal or boot from alternate
block. The current model only performs the system reset function as
this is used by U-Boot and Linux.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Message-id: 1485452251-1593-2-git-send-email-clg@kaod.org
[clg: - fixed compile breakage
      - fixed io region size
      - added watchdog_perform_action() on timer expiry
      - wrote a commit log
      - merged fixes from Andrew Jeffery to scale the reload value ]
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:59 +00:00
Pavel Dovgalyuk
26d3202207 integratorcp: adding vmstate for save/restore
VMState added by this patch preserves correct
loading of the integratorcp device state.

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-id: 20170131114310.6768.79416.stgit@PASHA-ISP
[PMM: removed unnecessary minimum_version_id_old lines]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 18:29:58 +00:00
Peter Maydell
d0dff238a8 Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170206' into staging
migration/next for 20170206

# gpg: Signature made Mon 06 Feb 2017 16:13:26 GMT
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration/20170206:
  postcopy: Recover block devices on early failure
  Postcopy: Reset state to avoid cleanup assert
  vmstate registration: check return values
  migration: Check for ID length
  vmstate_register_with_alias_id: Take an Error **
  migration: create Migration Incoming State at init time

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 15:29:26 +00:00
Peter Maydell
3c457da147 Merge remote-tracking branch 'remotes/rth/tags/pull-hppa-20170206' into staging
Misc hppa fixes.

# gpg: Signature made Tue 07 Feb 2017 02:28:40 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-hppa-20170206:
  target/hppa: Fix gdb_write_register
  target/hppa: Tidy do_cbranch
  linux-user: define correct UTS machine name for hppa
  linux-user: fix "apt-get update" on linux-user hppa
  linux-user: add hppa magic numbers in qemu-binfmt-conf.sh

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 14:11:52 +00:00
Daniel P. Berrange
ac7568bd3f rules: don't try to create missing include dirs
In

  commit ba78db44f6
  Author: Daniel P. Berrange <berrange@redhat.com>
  Date:   Wed Jan 25 16:14:10 2017 +0000

  make: move top level dir to end of include search path

The dir $(BUILD_DIR)/$(@D) was added to the include
path. This would sometimes point to a non-existant
directory, if the sub-dir in question did not contain
any target-independant files (eg tcg/). To deal with
this the rules.mak attempted to create the directory.

While this was succesful, it also caused accidental
creation of files in the parent of the build dir.
e.g. when building common source files into target
specific output files.

Rather than trying to workaround this, just revert
the code that attempted to mkdir the missing include
directories. Instead just turn off the compiler warning
in question as the missing dir is expected & harmless
in general.

NB: you can clean up a build directory parent that has
been filled with empty directories by commit ba78db44f6
using this GNU find command in that parent directory:
  find audio backends block chardev crypto disas fsdev hw io linux-user \
    migration nbd net qapi qom replay slirp target ui util \
    -type d -empty -delete

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Tested-by: Alberto Garcia <berto@igalia.com>
[PMM: added note about how to clean up a polluted directory]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-07 11:52:34 +00:00
Richard Henderson
6836a8fb96 target/hppa: Fix gdb_write_register
Add a missing break, detected by Coverity.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-06 18:25:31 -08:00
Richard Henderson
a881c8e73f target/hppa: Tidy do_cbranch
Removes some dead code detected by Covarity.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-06 18:24:40 -08:00
Laurent Vivier
3d96995dec linux-user: define correct UTS machine name for hppa
the correct UTS machine name (as expected by systemd) is "parisc",
not "hppa".

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170126080449.28255-4-laurent@vivier.eu>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-06 18:24:40 -08:00
Laurent Vivier
40493c5f2b linux-user: fix "apt-get update" on linux-user hppa
apt-get was hanging on linux-user hppa.

strace has shown the netlink data stream was not correctly byte swapped.

It appears the fd translator function is unregistered just after it
has been registered, so the translator function is not called.

This patch removes the fd_trans_unregister() after the do_socket()
in the TARGET_NR_socket case.

This fd_trans_unregister() was added by commit
    e36800c linux-user: add signalfd/signalfd4 syscalls
when do_socket() was not registering any fd translator.
And as now it is, we must remove this fd_trans_unregister() to keep them.

Reported-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Tested-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Message-Id: <20170126080449.28255-3-laurent@vivier.eu>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-06 18:24:39 -08:00
Laurent Vivier
e4d966cc65 linux-user: add hppa magic numbers in qemu-binfmt-conf.sh
As we have now a linux-user HPPA target, we can add it to the list of
supported targets in qemu-binfmt-conf.sh

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-Id: <20170126080449.28255-2-laurent@vivier.eu>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-06 18:24:39 -08:00
Dr. David Alan Gilbert
ef8d6488d2 postcopy: Recover block devices on early failure
An early postcopy failure can be recovered from as long as we know
we haven't sent the command to run the destination.
We have to undo the bdrv_inactivate_all by calling
bdrv_invalidate_cache_all

Note that I'm not using ms->block_inactive because once we've
sent the postcopy package we dont want anything else to try
and recover the block storage on the source; the destination
might have started writing to it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170202155909.31784-3-dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-02-06 13:36:49 +01:00
Dr. David Alan Gilbert
328d4d8528 Postcopy: Reset state to avoid cleanup assert
On a destination host with no userfault support an incoming
postcopy would cause the state to enter ADVISE before
it realised there was no support, and because it was in ADVISE
state it would perform a cleanup at the end.  Since there
was no support the cleanup function should be unreachable,
but ends up being called and asserting.

Reset the state when we realise we have no support, thus the
cleanup doesn't happen.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170202155909.31784-2-dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-02-06 13:36:49 +01:00
Dr. David Alan Gilbert
67980031d2 vmstate registration: check return values
Check qdev's call to vmstate_register_with_alias_id; that gets
most of the common uses; there's hundreds of calls via vmstate_register
which could get fixed over time.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170202125956.21942-4-dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-02-06 13:36:49 +01:00
Dr. David Alan Gilbert
581f08bac2 migration: Check for ID length
The qdev id of a device can be huge if it's on the end of a chain
of bridges; in reality such chains shouldn't occur but they can
be made to by chaining PCIe bridges together.

The migration format has a number of 256 character long format
limits; check we don't hit them (we already use pstrcat/cpy but
that just protects us from buffer overruns, we fairly quickly
hit an assert).

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170202125956.21942-3-dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-02-06 13:36:49 +01:00
Dr. David Alan Gilbert
bc5c4f2196 vmstate_register_with_alias_id: Take an Error **
I'll be adding an error to it in a subsequent patch.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170202125956.21942-2-dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-02-06 13:36:49 +01:00
Juan Quintela
b4b076daf3 migration: create Migration Incoming State at init time
Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1485207141-1941-3-git-send-email-quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-02-06 13:36:49 +01:00
Peter Maydell
7d2c6c9551 Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20170206-1' into staging
usb: various bugfixes, mostly xhci.

# gpg: Signature made Mon 06 Feb 2017 11:26:35 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20170206-1:
  xhci: fix event queue IRQ handling
  usb: ccid: check ccid apdu length
  xhci: guard xhci_kick_epctx against recursive calls
  xhci: don't kick in xhci_submit and xhci_fire_ctl_transfer
  xhci: rename xhci_complete_packet to xhci_try_complete_packet
  xhci: only free completed transfers
  usb: accept usb3 control requests
  usb/uas: more verbose error message
  hw/usb/dev-hid: Improve guest compatibility of usb-tablet

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-06 11:44:08 +00:00
Gerd Hoffmann
7da76e12cc xhci: fix event queue IRQ handling
The qemu xhci emulation doesn't handle the ERDP_EHB flag correctly.

When the host adapter queues a new event the ERDP_EHB flag is set.  The
flag is cleared (via w1c) by the guest when it updates the ERDP (event
ring dequeue pointer) register to notify the host adapter which events
it has fetched.

An IRQ must be raised in case the ERDP_EHB flag flips from clear to set.
If the flag is set already (which implies there are events queued up
which are not yet processed by the guest) xhci must *not* raise a IRQ.

Qemu got that wrong and raised an IRQ on every event, thereby generating
spurious interrupts in case we've queued events faster than the guest
processed them.  This patch fixes that.

With that change in place we also have to check ERDP updates, to see
whenever the guest has fetched all queued events.  In case there are
still pending events set ERDP_EHB and raise an IRQ again, to make sure
the events don't linger unseen forever.

The linux kernel driver and the microsoft windows driver (shipped with
win8+) can deal with the spurious interrupts without problems.  The
renesas windows driver (v2.1.39) which can be used on older windows
versions is quite upset though.  It does spurious ERDP updates now and
then (not every time, seems we must hit a race window for this to
happen), which in turn makes the qemu xhci emulation think the event
ring is full.  Things go south from here ...

tl;dr: This is the "fix xhci on win7" patch.

Cc: M.Cerveny@computer.org
Cc: 1373228@bugs.launchpad.net
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486104705-13761-1-git-send-email-kraxel@redhat.com
2017-02-06 12:12:26 +01:00
Peter Maydell
c192325242 Merge remote-tracking branch 'remotes/dgibson/tags/isa-cleanup-20170206' into staging
Allow ISA to be disabled on some platforms (v3)

This makes some cleanups that are a start on allowing ISA to be
compiled out for platforms which don't use it.

I posted this series last November, and it collected a number of R-bs
and no apparent objections.  So, I've now rebased it (trivially) and
am sending a pull request in the hopes of merge.  A lot of the pieces
here don't have a clear maintainer, so I'm sending it directly to
Peter.

Notes:
  * Patch 3/3 triggers a style warning, but that's just because I'm
    moving a C++ // comment verbatim from one file to another

Changes since v2:
  * Trivial rebase

Changes since v1:
  * Fixed some silly compile errors in 3/3 exposed by some
    changes in other headers

# gpg: Signature made Mon 06 Feb 2017 01:37:50 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/isa-cleanup-20170206:
  Split ISA and sysbus versions of m48t59 device
  Allow ISA bus to be configured out
  Split serial-isa into its own config option

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-06 10:29:12 +00:00
Prasad J Pandit
c7dfbf3225 usb: ccid: check ccid apdu length
CCID device emulator uses Application Protocol Data Units(APDU)
to exchange command and responses to and from the host.
The length in these units couldn't be greater than 65536. Add
check to ensure the same. It'd also avoid potential integer
overflow in emulated_apdu_from_guest.

Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-id: 20170202192228.10847-1-ppandit@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-06 10:23:18 +01:00
Gerd Hoffmann
96d87bdda3 xhci: guard xhci_kick_epctx against recursive calls
Track xhci_kick_epctx processing being active in a variable.  Check the
variable before calling xhci_kick_epctx from xhci_kick_ep.  Add an
assert to make sure we don't call recursively into xhci_kick_epctx.

Cc: 1653384@bugs.launchpad.net
Fixes: 94b037f2a4
Reported-by: Fabian Lesniak <fabian@lesniak-it.de>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486035372-3621-1-git-send-email-kraxel@redhat.com
Message-id: 1485790607-31399-5-git-send-email-kraxel@redhat.com
2017-02-06 10:23:18 +01:00
Gerd Hoffmann
ddb603ab6c xhci: don't kick in xhci_submit and xhci_fire_ctl_transfer
xhci_submit and xhci_fire_ctl_transfer are is called from
xhci_kick_epctx processing loop only, so there is no need to call
xhci_kick_epctx make sure processing continues.  Also eecursive calls
into xhci_kick_epctx can cause trouble.

Drop the xhci_kick_epctx calls.

Cc: 1653384@bugs.launchpad.net
Fixes: 94b037f2a4
Reported-by: Fabian Lesniak <fabian@lesniak-it.de>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1485790607-31399-4-git-send-email-kraxel@redhat.com
2017-02-06 10:23:18 +01:00
Gerd Hoffmann
13e8ff7abb xhci: rename xhci_complete_packet to xhci_try_complete_packet
Make clear that this isn't guaranteed to actually complete the transfer,
the usb packet can still be in flight after calling that function.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1485790607-31399-3-git-send-email-kraxel@redhat.com
2017-02-06 10:23:17 +01:00
Gerd Hoffmann
f94d18d6c6 xhci: only free completed transfers
Most callsites check already, one was missed.

Cc: 1653384@bugs.launchpad.net
Fixes: 94b037f2a4
Reported-by: Fabian Lesniak <fabian@lesniak-it.de>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1485790607-31399-2-git-send-email-kraxel@redhat.com
2017-02-06 10:23:17 +01:00
Gerd Hoffmann
811ad5d8f1 usb: accept usb3 control requests
Windows 10 reportedly sends these, so accept them in case
the device in question is a superspeed (usb3) device.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1485870727-21956-2-git-send-email-kraxel@redhat.com
2017-02-06 10:23:17 +01:00
Gerd Hoffmann
e306b2fd3b usb/uas: more verbose error message
Print some more details in case we get a unknown
control request, to ease trouble-shooting.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1485870727-21956-1-git-send-email-kraxel@redhat.com
2017-02-06 10:23:17 +01:00
Phil Dennis-Jordan
0cd089e937 hw/usb/dev-hid: Improve guest compatibility of usb-tablet
1. Set bInterfaceProtocol to 0x00 for usb-tablet. This should be
    non-zero for boot protocol devices only, which the usb-tablet is not.
 2. Set the usb-tablet's usage to "mouse" in the report descriptor.

The boot protocol of 0x02 specifically confused OS X/macOS' HID driver
stack, causing it to generate additional bogus HID events with relative
motion in addition to the tablet's absolute coordinate events.

Absolute pointing devices with HID Report Descriptor usage of 0x01
(pointing) are treated by the macOS HID driver as analog sticks, and
absolute coordinates are not directly translated to absolute mouse
cursor positions. Changing it to 0x02 (mouse) fixes the problem, and
does not have any adverse effect in other operating systems and
windowing systems. (VMWare does the same thing.)

Signed-off-by: Phil Dennis-Jordan <phil@philjordan.eu>
Message-id: 1485365075-32702-1-git-send-email-phil@philjordan.eu
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-06 10:23:17 +01:00
David Gibson
c124c4d13b Split ISA and sysbus versions of m48t59 device
The m48t59 device supports both ISA and direct sysbus attached versions of
the device in the one .c file.  This can be awkward for some embedded
machine types which need the sysbus M48T59, but don't want to pull in the
ISA bus code and its other dependencies.

Therefore, this patch splits out the code for the ISA attached M48T59 into
its own C file.  It will be built when both CONFIG_M48T59 and
CONFIG_ISA_BUS are enabled.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-06 12:33:21 +11:00
David Gibson
1fc125f567 Allow ISA bus to be configured out
Currently, the code to handle the legacy ISA bus is always included in
qemu.  However there are lots of platforms that don't include ISA legacy
devies, and quite a few that have never used ISA legacy devices at all.

This patch allows the ISA bus code to be disabled in the configuration for
platforms where it doesn't make sense.

For now, the default configs are adjusted to include ISA on all platforms
including PCI: anything with PCI can at least in principle add an i82378
PCI->ISA bridge.  Also, CONFIG_IDE_CORE which is already in pci.mak
requires ISA support.

We also explicitly enable ISA on some other non-PCI platforms which include
ISA devices: moxie, sparc and unicore32.  We may want to pare this down in
future.

The platforms that will lose ISA by default are: cris, lm32, microblazeel,
microblaze, openrisc, s390x, tricore, xtensaeb, xtensa.  As far as I can
tell none of these ever used ISA.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-06 12:33:21 +11:00
David Gibson
1401c322c8 Split serial-isa into its own config option
At present, the core device model code for 8250-like serial ports
(serial.c) and the code for serial ports attached to ISA-style legacy IO
(serial-isa.c) are both controlled by the CONFIG_SERIAL variable.

There are lots and lots of embedded platforms that have 8250-like serial
ports but have never had anything resembling ISA legacy IO.  Therefore,
split serial-isa into its own CONFIG_SERIAL_ISA option so it can be
disabled for platforms where it's not appropriate.

For now, I enabled CONFIG_SERIAL_ISA in every default-config where
CONFIG_SERIAL is enabled, excepting microblaze, or32, and xtensa.  As best
as I can tell, those platforms never used legacy ISA, and also don't
include PCI support (which would allow connection of a PCI->ISA bridge
and/or a southbridge including legacy ISA serial ports).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-06 12:33:21 +11:00
Peter Maydell
a951316b8a Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Fri 03 Feb 2017 14:37:45 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  iothread: enable AioContext polling by default

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-03 14:41:49 +00:00
Stefan Hajnoczi
cdd7abfdba iothread: enable AioContext polling by default
IOThread AioContexts are likely to consist only of event sources like
virtqueue ioeventfds and LinuxAIO completion eventfds that are pollable
from userspace (without system calls).

We recently merged the AioContext polling feature but didn't enable it
by default yet.  I have gone back over the performance data on the
mailing list and picked a default polling value that gave good results.

Let's enable AioContext polling by default so users don't have another
switch they need to set manually.  If performance regressions are found
we can still disable this for the QEMU 2.9 release.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Karl Rister <krister@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20170126170119.27876-1-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-03 14:23:38 +00:00
Peter Maydell
4100a344eb Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20170202' into staging
Xen 2017/02/02

# gpg: Signature made Thu 02 Feb 2017 18:26:58 GMT
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <sstabellini@kernel.org>"
# gpg:                 aka "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20170202:
  xen: use qdev_unplug() instead of g_free() in xen_pv_find_xendev()
  MAINTAINERS: Update xen-devel mailing list address
  xen-platform: add missing disk unplug option
  xen-platform: add support for unplugging NVMe disks...
  xen-platform: re-structure unplug_disks

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-03 12:31:40 +00:00
Stefan Weil
77e217d1bf tci: Remove invalid assertions
tb_jmp_insn_offset and tb_jmp_reset_offset are pointers
and cannot be used with ARRAY_SIZE.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Message-id: 20170202195601.11286-1-sw@weilnetz.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-03 11:38:55 +00:00
Peter Maydell
5b66d7ae89 Merge remote-tracking branch 'remotes/kraxel/tags/pull-vga-20170202-2' into staging
cirrus: multiple bugfixes, including CVE-2017-2615 fix.

# gpg: Signature made Thu 02 Feb 2017 15:03:35 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-vga-20170202-2:
  cirrus: fix oob access issue (CVE-2017-2615)
  cirrus: fix blit address mask handling
  cirrus: allow zero source pitch in pattern fill rops
  cirrus: handle negative pitch in cirrus_invalidate_region()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-03 09:52:51 +00:00
Peter Maydell
5459ef3bff Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.9-20170202' into staging
ppc patch queue 2017-02-02

This obsoletes ppc-for-2.9-20170112, which had a MacOS build bug.

This is a long overdue ppc pull request for qemu-2.9.  It's been a
long time coming due to some holidays and inconveniently timed
problems with testing.  So, there's a lot in here:

    * More POWER9 instruction implementations for TCG
    * The simpler parts of my CPU compatibility mode cleanup
        * This changes behaviour to prefer compatibility modes over
          "raW" mode for new machine type versions
    * New "40p" machine type which is essentially a modernized and
      cleaned up "prep".  The intention is that it will replace "prep"
      once it has some more testing and polish.
    * Add pseries-2.9 machine type
    * Implement H_SIGNAL_SYS_RESET hypercall
    * Consolidate the two alternate CPU init paths in pseries by
      making it always go through CPU core objects to initialize CPU
    * A number of bugfixes and cleanups
    * Stop the guest timebase when the guest is stopped under KVM.
      This makes the guest system clock also stop when paused, which
      matches the x86 behaviour.
    * Some preliminary cleanups leading towards implementation of the
      POWER9 MMU.

There are also some changes not strictly related to ppc code, but for
its benefit:

    * Limit the pxi-expander-bridge (PXB) device to x86 guests only
      (it's essentially a hack to work around historical x86
      limitations)
    * Some additions to the 128-bit math in host_utils, necessary for
      some of the new instructions.
    * Revise a number of qtests and enable them for ppc

# gpg: Signature made Thu 02 Feb 2017 01:40:16 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.9-20170202: (107 commits)
  hw/ppc/pnv: Use error_report instead of hw_error if a ROM file can't be found
  ppc/kvm: Handle the "family" CPU via alias instead of registering new types
  target/ppc/mmu_hash64: Fix incorrect shift value in amr calculation
  target/ppc/mmu_hash64: Fix printing unsigned as signed int
  tcg/POWER9: NOOP the cp_abort instruction
  target/ppc/debug: Print LPCR register value if register exists
  target-ppc: Add xststdc[sp, dp, qp] instructions
  target-ppc: Add xvtstdc[sp,dp] instructions
  target-ppc: Add MMU model check for booke machines
  ppc: switch to constants within BUILD_BUG_ON
  target/ppc/cpu-models: Fix/remove bad CPU aliases
  target/ppc: Remove unused POWERPC_FAMILY(POWER)
  spapr: clock should count only if vm is running
  ppc: Remove unused function cpu_ppc601_rtc_init()
  target/ppc: Add pcr_supported to POWER9 cpu class definition
  powerpc/cpu-models: rename ISAv3.00 logical PVR definition
  target-ppc: Add xvcv[hpsp, sphp] instructions
  target-ppc: Add xsmulqp instruction
  target-ppc: Add xsdivqp instruction
  target-ppc: Add xscvsdqp and xscvudqp instructions
  ...

# Conflicts:
#	hw/pci-bridge/Makefile.objs

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-02 18:48:06 +00:00
Juergen Gross
e9dcbc86d6 xen: use qdev_unplug() instead of g_free() in xen_pv_find_xendev()
The error exits of xen_pv_find_xendev() free the new xen-device via
g_free() which is wrong.

As the xen-device has been initialized as qdev it must be removed
via qdev_unplug().

This bug has been introduced with commit 3a6c9172ac
("xen: create qdev for each backend device").

Reported-by: Roger Pau Monné <roger.pau@citrix.com>
Tested-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
2017-02-02 10:23:53 -08:00
Peter Maydell
4e9f5244e1 Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Wed 01 Feb 2017 13:44:32 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  trace: clean up trace-events files
  qapi: add missing trace_visit_type_enum() call
  trace: improve error reporting when parsing simpletrace header
  trace: update docs to reflect new code generation approach
  trace: switch to modular code generation for sub-directories
  trace: move setting of group name into Makefiles
  trace: move hw/i386/xen events to correct subdir
  trace: move hw/xen events to correct subdir
  trace: move hw/block/dataplane events to correct subdir
  make: move top level dir to end of include search path

# Conflicts:
#	Makefile

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-02 16:08:28 +00:00
Peter Maydell
0b17d809b0 Merge remote-tracking branch 'remotes/borntraeger/tags/s390x-20170201' into staging
s390x fixes

- build error with old gcc versions
- race between cmma reset and rom/loader resets
- linux-user vs. cpu model

# gpg: Signature made Wed 01 Feb 2017 08:24:47 GMT
# gpg:                using RSA key 0x117BBC80B5A61C7C
# gpg: Good signature from "Christian Borntraeger (IBM) <borntraeger@de.ibm.com>"
# Primary key fingerprint: F922 9381 A334 08F9 DBAB  FBCA 117B BC80 B5A6 1C7C

* remotes/borntraeger/tags/s390x-20170201:
  target/s390x: use "qemu" cpu model in user mode
  s390x/kvm: fix small race reboot vs. cmma
  s390-pci: fix compilation on older GCC versions

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-02 15:14:24 +00:00
Li Qiang
62d4c6bd52 cirrus: fix oob access issue (CVE-2017-2615)
When doing bitblt copy in backward mode, we should minus the
blt width first just like the adding in the forward mode. This
can avoid the oob access of the front of vga's vram.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>

{ kraxel: with backward blits (negative pitch) addr is the topmost
          address, so check it as-is against vram size ]

Cc: qemu-stable@nongnu.org
Cc: P J P <ppandit@redhat.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Wolfgang Bumiller <w.bumiller@proxmox.com>
Fixes: d3532a0db0 (CVE-2014-8106)
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1485938101-26602-1-git-send-email-kraxel@redhat.com
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
2017-02-02 15:58:23 +01:00
Peter Maydell
53761caf17 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Wed 01 Feb 2017 05:32:23 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  sheepdog: reorganize check for overlapping requests
  sheepdog: simplify inflight_aio_head management
  sheepdog: do not use BlockAIOCB
  sheepdog: reorganize coroutine flow
  sheepdog: remove unused cancellation support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-02 14:00:10 +00:00
Peter Maydell
e905587b75 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, vhost, pci: fixes, features

generic pci root port support
disable shpc by default
safer version of ARRAY_SIZE and QEMU_BUILD_BUG_ON
fixes and cleanups all over the place

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Wed 01 Feb 2017 01:38:34 GMT
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (22 commits)
  arm: add trailing ; after MISMATCH_CHECK
  arm: better stub version for MISMATCH_CHECK
  hw/pci: disable pci-bridge's shpc by default
  vhost-user: delete chardev on cleanup
  vhost: skip ROM sections
  virtio: make virtio_should_notify static
  pci: Convert msix_init() to Error and fix callers
  hcd-xhci: check & correct param before using it
  msix: Follow CODING_STYLE
  hw/i386: check if nvdimm is enabled before plugging
  hw/pcie: Introduce Generic PCI Express Root Port
  hw/ioh3420: derive from PCI Express Root Port base class
  hw/pcie: Introduce a base class for PCI Express Root Ports
  intel_iommu: fix and simplify size calculation in process_device_iotlb_desc()
  pci: mark ROMs read-only
  ARRAY_SIZE: check that argument is an array
  compiler: expression version of QEMU_BUILD_BUG_ON
  compiler: rework BUG_ON using a struct
  QEMU_BUILD_BUG_ON: use __COUNTER__
  ppc: switch to constants within BUILD_BUG_ON
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-02 11:03:37 +00:00
Peter Maydell
2d6752d38d Merge remote-tracking branch 'remotes/elmarco/tags/chr-split-pull-request' into staging
# gpg: Signature made Tue 31 Jan 2017 19:32:40 GMT
# gpg:                using RSA key 0xDAE8E10975969CE5
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>"
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* remotes/elmarco/tags/chr-split-pull-request: (41 commits)
  char: headers clean-up
  char: move parallel chardev in its own file
  char: move serial chardev to its own file
  char: move pty chardev in its own file
  char: move pipe chardev in its own file
  char: move console in its own file
  char: move stdio in its own file
  char: move file chardev in its own file
  char: move udp chardev in its own file
  char: move socket chardev to its own file
  char: move win-stdio into its own file
  char: move win chardev base class in its own file
  char: move fd chardev in its own file
  char: move QIOChannel-related stuff to char-io.h
  char: remove unused READ_RETRIES
  char: rename and move to header CHR_READ_BUF_LEN
  char: move ringbuf/memory to its own file
  char: move mux to its own file
  char: move null chardev to its own file
  char: make null_chr_write() the default method
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-02 09:50:21 +00:00
Thomas Huth
7c6e879733 hw/ppc/pnv: Use error_report instead of hw_error if a ROM file can't be found
hw_error() is for CPU related errors only (it dumps the CPU registers
and  calls abort()!), so using error_report() is the better choice
of reporting an error in case we simply did not find a file.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-02 09:30:07 +11:00
Thomas Huth
715d4b96a4 ppc/kvm: Handle the "family" CPU via alias instead of registering new types
When running with KVM on POWER, we are registering a "family" CPU
type for the host CPU that we are running on. For example, on all
POWER8-compatible hosts, we register a "POWER8" CPU type, so that
you can always start QEMU with "-cpu POWER8" there, without the
need to know whether you are running on a POWER8, POWER8E or POWER8NVL
host machine.
However, we also have a "POWER8" CPU alias in the ppc_cpu_aliases list
(that is mainly useful for TCG). This leads to two cosmetical drawbacks:
If the user runs QEMU with "-cpu ?", we always claim that POWER8 is an
"alias for POWER8_v2.0" - which is simply not true when running with
KVM on POWER. And when using the 'query-cpu-definitions' QMP call,
there are currently two entries for "POWER8", one for the alias, and
one for the additional registered type.
To solve the two problems, we should rather update the "family" alias
instead of registering a new types. We then only have one "POWER8"
CPU definition around, an alias, which also points to the right
destination.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1396536
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-02 09:30:07 +11:00
Suraj Jitindar Singh
6925f12f4f target/ppc/mmu_hash64: Fix incorrect shift value in amr calculation
We are calculating the authority mask register key value wrong.

The pte entry contains the key value with the two upper bits and the three
lower bits stored separately. We should use these two portions to get a 5
bit value, not or them together which will only give us a 3 bit value.

Fix this.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-02 09:30:07 +11:00
Suraj Jitindar Singh
76134d48b3 target/ppc/mmu_hash64: Fix printing unsigned as signed int
We were printing an unsigned value as a signed value, fix this.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-02 09:30:07 +11:00
Suraj Jitindar Singh
b8b4576e09 tcg/POWER9: NOOP the cp_abort instruction
The cp_abort instruction is used to remove the state of an in progress
copy paste sequence. POWER9 compilers add this in various places, such
as context switches which causes illegal instruction signals since we
don't yet implement this instruction.

Given there is no implementation of the copy paste facility and that we
don't claim to support it, we can just noop this instruction.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-02 09:30:06 +11:00
Suraj Jitindar Singh
d801a61e98 target/ppc/debug: Print LPCR register value if register exists
It can be useful when debugging to print the LPCR value.

Thus we add the LPCR to the "info registers" output if the register had
been defined.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-02 09:30:06 +11:00
Nikunj A Dadhania
78241762c4 target-ppc: Add xststdc[sp, dp, qp] instructions
xststdcsp: VSX Scalar Test Data Class Single-Precision
xststdcdp: VSX Scalar Test Data Class Double-Precision
xststdcqp: VSX Scalar Test Data Class Quad-Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-02 09:30:06 +11:00
Nikunj A Dadhania
403a884a40 target-ppc: Add xvtstdc[sp,dp] instructions
xvtstdcsp: VSX Vector Test Data Class Single-Precision
xvtstdcdp: VSX Vector Test Data Class Double-Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-02 09:30:06 +11:00
Valentin Plotkin
00469dc373 target-ppc: Add MMU model check for booke machines
Machines bamboo, e500 and virtex-ml507 assume a certain MMU model,
otherwise resulting in unpredictable behavior. Add apropriate checks
into *_init functions.

Signed-off-by: Valentin Plotkin <caliborn@sdf.org>

[regarding virtex parts]
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-02 09:30:06 +11:00
Gerd Hoffmann
60cd23e851 cirrus: fix blit address mask handling
Apply the cirrus_addr_mask to cirrus_blt_dstaddr and cirrus_blt_srcaddr
right after assigning them, in cirrus_bitblt_start(), instead of having
this all over the place in the cirrus code, and missing a few places.

Reported-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1485338996-17095-1-git-send-email-kraxel@redhat.com
2017-02-01 09:47:22 +01:00
Wolfgang Bumiller
5858dd1801 cirrus: allow zero source pitch in pattern fill rops
The rops used by cirrus_bitblt_common_patterncopy only use
the destination pitch, so the source pitch shoul allowed to
be zero and the blit with used for the range check around the
source address.

Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Message-id: 1485272138-23249-1-git-send-email-w.bumiller@proxmox.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-01 09:43:09 +01:00
Wolfgang Bumiller
f153b563f8 cirrus: handle negative pitch in cirrus_invalidate_region()
cirrus_invalidate_region() calls memory_region_set_dirty()
on a per-line basis, always ranging from off_begin to
off_begin+bytesperline. With a negative pitch off_begin
marks the top most used address and thus we need to do an
initial shift backwards by a line for negative pitches of
backward blits, otherwise the first iteration covers the
line going from the start offset forwards instead of
backwards.
Additionally since the start address is inclusive, if we
shift by a full `bytesperline` we move to the first address
*not* included in the blit, so we only shift by one less
than bytesperline.

Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Message-id: 1485352137-29367-1-git-send-email-w.bumiller@proxmox.com

[ kraxel: codestyle fixes ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-01 09:41:53 +01:00
David Hildenbrand
d8923bc754 target/s390x: use "qemu" cpu model in user mode
"any" does not exist, therefore resulting in a misleading error message.

Reported-by: Stefan Weil <sw@weilnetz.de>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170130145025.26475-1-david@redhat.com>
Reviewed-by: Stefan Weil <sw@weilnetz.de>
Reviewed-by: Alexander Graf <agraf@suse.de>
Cc: qemu-stable@nongnu.org
2017-02-01 09:15:17 +01:00
Christian Borntraeger
1a0e4c8b02 s390x/kvm: fix small race reboot vs. cmma
Right now we reset all devices before we reset the cmma states.  This
can result in the host kernel discarding guest pages that were
previously in the unused state but already contain a bios or a -kernel
file before the cmma reset has finished.  This race results in random
guest crashes or hangs during very early reboot.

Fixes: 1cd4e0f6f0 ("s390x/cmma: clean up cmma reset")
Cc: qemu-stable@nongnu.org
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-02-01 09:11:56 +01:00
Paolo Bonzini
2034ee5152 s390-pci: fix compilation on older GCC versions
S390PCIBusDevice is typedef'ed earlier in the file, before the hunks
that this patch modifies.  The double typedef causes old versions of
GCC to complain.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1485523252-88288-1-git-send-email-pbonzini@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-02-01 09:11:56 +01:00
Paolo Bonzini
acf6e5f096 sheepdog: reorganize check for overlapping requests
Wrap the code that was copied repeatedly in the two functions,
sd_aio_setup and sd_aio_complete.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20161129113245.32724-6-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-01 00:17:20 -05:00
Paolo Bonzini
c4080e9391 sheepdog: simplify inflight_aio_head management
Add to the list in add_aio_request and, indirectly, resend_aioreq.  Inline
free_aio_req in the caller, it does not simply undo alloc_aio_req's job.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20161129113245.32724-5-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-01 00:17:20 -05:00
Paolo Bonzini
28ddd08cd6 sheepdog: do not use BlockAIOCB
Sheepdog's AIOCB are completely internal entities for a group of
requests and do not need dynamic allocation.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20161129113245.32724-4-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-01 00:17:20 -05:00
Paolo Bonzini
e80ab33dc0 sheepdog: reorganize coroutine flow
Delimit co_recv's lifetime clearly in aio_read_response.

Do a simple qemu_coroutine_enter in aio_read_response, letting
sd_co_writev call sd_write_done.

Handle nr_pending in the same way in sd_co_rw_vector,
sd_write_done and sd_co_flush_to_disk.

Remove sd_co_rw_vector's return value; just leave with no
pending requests.

[Jeff: added missing 'return' back, spotted by Paolo after
       series was applied.]

Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-01 00:17:20 -05:00
Paolo Bonzini
a71264f9f5 sheepdog: remove unused cancellation support
SheepdogAIOCB is internal to sheepdog.c, hence it is never canceled.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20161129113245.32724-2-pbonzini@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-01 00:17:20 -05:00
Michael S. Tsirkin
1b28762a33 arm: add trailing ; after MISMATCH_CHECK
Macro calls without a trailing ; look weird in C, this works as a side
effect of how QEMU_BUILD_BUG_ON is implemented. Fix this up.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-01 03:37:18 +02:00
Michael S. Tsirkin
705ae59fec arm: better stub version for MISMATCH_CHECK
stub version of MISMATCH_CHECK is empty so it's easy to misuse for
people not building kvm on arm.  Use QEMU_BUILD_BUG_ON similar to the
non-stub version to make it easier to catch bugs.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-01 03:37:18 +02:00
Marcel Apfelbaum
dc0ae76770 hw/pci: disable pci-bridge's shpc by default
The shpc component is optional while  ACPI hotplug is used
for hot-plugging PCI devices into a PCI-PCI bridge.
Disabling the shpc by default will make slot 0 usable at boot time
and not only for hot-plug, without loosing any functionality.
Older machines will have shpc enabled for compatibility reasons.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-01 03:37:18 +02:00
Marc-André Lureau
e0b283e7c5 vhost-user: delete chardev on cleanup
Remove the chardev implicitly when cleaning up the netdev. This
prevents from reusing the chardev since it would be in an incorrect
state with the slave.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1256618

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-01 03:37:18 +02:00
Michael S. Tsirkin
d56ec1e98c vhost: skip ROM sections
vhost does not support RO protections on memory at the moment - adding
ROMs would mean that e.g. a buggy guest might change them in-memory - a
condition from which guest reset does not recover. Not nice.

We also definitely don't want to try logging writes into ROMs -
in particular guests set very high addresses for ROM BARs
so logging these writes would waste a lot of memory.

Maybe ROMs could be supported with the iotlb variant -
not sure, but there seems to be no good reason for virtio
to try to do DMA from ROM. So let's just skip ROM memory.

Suggested-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Tested-by: Laurent Vivier <lvivier@redhat.com>
2017-02-01 03:37:18 +02:00
Paolo Bonzini
c25d97c4ff virtio: make virtio_should_notify static
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-01 03:37:18 +02:00
Cao jin
ee640c625e pci: Convert msix_init() to Error and fix callers
msix_init() reports errors with error_report(), which is wrong when
it's used in realize().  The same issue was fixed for msi_init() in
commit 1108b2f. In order to make the API change as small as possible,
leave the return value check to later patch.

For some devices(like e1000e, vmxnet3, nvme) who won't fail because of
msix_init's failure, suppress the error report by passing NULL error
object.

Bonus: add comment for msix_init.

CC: Jiri Pirko <jiri@resnulli.us>
CC: Gerd Hoffmann <kraxel@redhat.com>
CC: Dmitry Fleytman <dmitry@daynix.com>
CC: Jason Wang <jasowang@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Hannes Reinecke <hare@suse.de>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Alex Williamson <alex.williamson@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-01 03:37:18 +02:00
Cao jin
20729dbd01 hcd-xhci: check & correct param before using it
usb_xhci_realize() corrects invalid values of property "intrs"
automatically, but the uncorrected value is passed to msi_init(),
which chokes on invalid values.  Delay that until after the
correction.

Resources allocated by usb_xhci_init() are leaked when msi_init()
fails.  Fix by calling it after msi_init().

CC: Gerd Hoffmann <kraxel@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-01 03:37:18 +02:00
Cao jin
9348243687 msix: Follow CODING_STYLE
CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-01 03:37:17 +02:00
Haozhong Zhang
e987c37aee hw/i386: check if nvdimm is enabled before plugging
The missing of 'nvdimm' in the machine type option '-M' means NVDIMM
is disabled. QEMU should refuse to plug any NVDIMM device in this case
and report the misconfiguration.

The behavior of NVDIMM on unsupported platform (HW/FW) is vendor
specific. For some vendors, it's undefined and the platform may do
anything. Thus, I think QEMU is free to choose the implementation.
Aborting QEMU (i.e. refusing to boot) is the easiest one.

Reported-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Message-Id: 20170112110928.GF4621@stefanha-x1.localdomain
Message-Id: 20170111093630.2088-1-stefanha@redhat.com
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-01 03:37:17 +02:00
Marcel Apfelbaum
f7d6f3fac8 hw/pcie: Introduce Generic PCI Express Root Port
The Generic Root Port behaves almost the same as the
Intel's IOH device with id 3420, without having
Intel specific attributes.

The device has two purposes:
 (1) Can be used on both X86 and ARM machines.
 (2) It will allow us to tweak the behaviour
    (e.g add vendor-specific PCI capabilities)
     - something that obviously cannot be done
       on a known device.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Andrea Bolognani <abologna@redhat.com>
2017-02-01 03:37:17 +02:00
Marcel Apfelbaum
fed23cb4e8 hw/ioh3420: derive from PCI Express Root Port base class
Preserve only Intel specific details.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-01 03:37:17 +02:00
Marcel Apfelbaum
9d5154d753 hw/pcie: Introduce a base class for PCI Express Root Ports
The 'base' PCI Express Root Port includes
the common code to be re-used for all
Root Ports implementations. Most of the code
was taken from the current implementation
of Intel's IOH 3420 Root Port.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-01 03:37:17 +02:00
Jason Wang
04eb6247eb intel_iommu: fix and simplify size calculation in process_device_iotlb_desc()
We don't use 1ULL which is wrong during size calculation. Fix it, and
while at it, switch to use cto64() and adds a comments to make it
simpler and easier to be understood.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-01 03:37:17 +02:00
Michael S. Tsirkin
ec42813028 pci: mark ROMs read-only
Looks like we didn't mark PCI ROMs as RO allowing
mischief such as guests writing there.
Further, e.g. vhost gets confused trying to allocate
enough space to log writes there. Fix it up.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Tested-by: Laurent Vivier <lvivier@redhat.com>
2017-02-01 03:37:17 +02:00
Michael S. Tsirkin
ed63ec0d22 ARRAY_SIZE: check that argument is an array
It's a familiar pattern: some code uses ARRAY_SIZE, then refactoring
changes the argument from an array to a pointer to a dynamically
allocated buffer.  Code keeps compiling but any ARRAY_SIZE calls now
return the size of the pointer divided by element size.

Let's add build time checks to ARRAY_SIZE before we allow more
of these in the code-base.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-01 03:37:17 +02:00
Michael S. Tsirkin
d757573e69 compiler: expression version of QEMU_BUILD_BUG_ON
QEMU_BUILD_BUG_ON uses a typedef in order to be safe
to use outside functions, but sometimes it's useful
to have a version that can be used within an expression.
Following what Linux does, introduce QEMU_BUILD_BUG_ON_ZERO
that return zero after checking condition at build time.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-02-01 03:37:17 +02:00
Michael S. Tsirkin
f291887e8e compiler: rework BUG_ON using a struct
There are theoretical concerns that some compilers might not trigger
build failures on attempts to define an array of size (x ? -1 : 1) where
x is a variable and make it a variable sized array instead. Let rewrite
using a struct with a negative bit field size instead as there are no
dynamic bit field sizes.  This is similar to what Linux does.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-02-01 03:37:17 +02:00
Michael S. Tsirkin
60abf0a5e0 QEMU_BUILD_BUG_ON: use __COUNTER__
Some headers use QEMU_BUILD_BUG_ON. This causes a problem
if the C file including that header happens to have
QEMU_BUILD_BUG_ON at the same line number.

Fix using a widely available extension: __COUNTER__.
If unavailable, provide a stub.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-01 03:37:17 +02:00
Michael S. Tsirkin
32f825dece ppc: switch to constants within BUILD_BUG_ON
We are switching BUILD_BUG_ON to verify that it's parameter is a
compile-time constant, and it turns out that some gcc versions
(specifically gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609) are
not smart enough to figure it out for expressions involving local
variables. This is harmless but means that the check is ineffective for
these platforms.  To fix, replace the variable with macros.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-01 03:37:17 +02:00
Marc-André Lureau
213dcb060f char: headers clean-up
Those could probably be squashed with earlier patches, however I
couldn't easily identify them, test them or check if there are still
necessary on various platforms.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:22 +04:00
Marc-André Lureau
6aa0f0c900 char: move parallel chardev in its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
2b2f23dae7 char: move serial chardev to its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
c3b7d62097 char: move pty chardev in its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
1fcb3841d0 char: move pipe chardev in its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
7c7b2db0bd char: move console in its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
d180081525 char: move stdio in its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
6fdafac1c1 char: move file chardev in its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
3b4482a26d char: move udp chardev in its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
d24ca4b8c5 char: move socket chardev to its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
0e58f17777 char: move win-stdio into its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
503ebefe0a char: move win chardev base class in its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
894593afbe char: move fd chardev in its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:21 +04:00
Marc-André Lureau
a6da7ffa38 char: move QIOChannel-related stuff to char-io.h
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:20 +04:00
Marc-André Lureau
09fbe4e3e1 char: remove unused READ_RETRIES
Curiously unused since its introduction in commit 7b0bfdf52d.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:20 +04:00
Marc-André Lureau
f612143a03 char: rename and move to header CHR_READ_BUF_LEN
This define is used by several character devices, place it in char
common header.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:20 +04:00
Marc-André Lureau
bf51f62869 char: move ringbuf/memory to its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:20 +04:00
Marc-André Lureau
df85a78bf8 char: move mux to its own file
A mechanical move, except that qemu_chr_write_all() needs to be declared
in char.h header to be used from chardev unit files.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:20 +04:00
Marc-André Lureau
247c92af2b char: move null chardev to its own file
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:20 +04:00
Marc-André Lureau
eb314a9497 char: make null_chr_write() the default method
All chardev must implement chr_write(), but parallel and null chardev
both use null_chr_write(). Move it to the base class, so we don't need
to export the function when splitting the chardev in respective files.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:20 +04:00
Marc-André Lureau
32d955a422 char: create chardev-obj-y
This will help to split char.c in several units without having to
reference them all everywhere. This is useful in particular for tests.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 23:31:20 +04:00
Peter Maydell
6fe791b5e3 Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20170131-2' into staging
ui: bugfixes and small improvements all over the place.

# gpg: Signature made Tue 31 Jan 2017 15:48:20 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-ui-20170131-2:
  console: fix console resize
  gtk: Hardcode LC_CTYPE as C.utf-8
  vnc: fix overflow in vnc_update_stats
  spice: wakeup QXL worker to pick up mouse changes
  ui/gtk.c: add ctrl-alt-= support for zoom in acceleration
  ui: fix format specfier in vnc to avoid break in build.
  ui/gtk: Fix mouse wheel on 3.4.0 or later
  vnc: track LED state separately
  ui: add support for mice with extra/side buttons
  ps2: add support for mice with extra/side buttons
  qapi: add support for mice with extra/side buttons

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-01-31 18:41:33 +00:00
Stefan Hajnoczi
7f4076c1bb trace: clean up trace-events files
There are a number of unused trace events that
scripts/cleanup-trace-events.pl finds.  The "hw/vfio/pci-quirks.c"
filename was typoed and "qapi/qapi-visit-core.c" was missing the qapi/
directory prefix.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170126171613.1399-3-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-01-31 17:12:15 +00:00
Stefan Hajnoczi
6514532f73 qapi: add missing trace_visit_type_enum() call
A trace event exists for enums but it's never called.  This patch fixes
this oversight so that enums are traced just like the other QAPI types.

Suggested-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170126171613.1399-2-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-01-31 17:11:28 +00:00
Daniel P. Berrange
25d54654da trace: improve error reporting when parsing simpletrace header
When loading a simpletrace binary file we just report
"Not a valid trace file!" which is not very helpful. Report
exactly which field we found to be invalid.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170125161417.31949-9-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-01-31 17:11:18 +00:00
Daniel P. Berrange
d4fa8436ce trace: update docs to reflect new code generation approach
Describe use of per-subdir trace events files and how it impacts
code generation.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170125161417.31949-8-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-01-31 17:11:18 +00:00
Daniel P. Berrange
0ab8ed18a6 trace: switch to modular code generation for sub-directories
Introduce rules in the top level Makefile that are able to generate
trace.[ch] files in every subdirectory which has a trace-events file.

The top level directory is handled specially, so instead of creating
trace.h, it creates trace-root.h. This allows sub-directories to
include the top level trace-root.h file, without ambiguity wrt to
the trace.g file in the current sub-dir.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170125161417.31949-7-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-01-31 17:11:18 +00:00
Daniel P. Berrange
2098c56a9b trace: move setting of group name into Makefiles
Having tracetool.py figure out the right group name from just
the input filename is not practical when considering the
different build vs src path combinations. Instead simply take
the group name as a command line arg from the Makefile, which
can trivially provide the right name.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170125161417.31949-6-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-01-31 17:11:18 +00:00
Daniel P. Berrange
9c5826306d trace: move hw/i386/xen events to correct subdir
The trace-events for a given source file should generally
always live in the same directory as the source file.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170125161417.31949-5-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-01-31 17:11:18 +00:00
Daniel P. Berrange
1416f9ea6d trace: move hw/xen events to correct subdir
The trace-events for a given source file should generally
always live in the same directory as the source file.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170125161417.31949-4-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-01-31 17:11:17 +00:00
Daniel P. Berrange
de928314aa trace: move hw/block/dataplane events to correct subdir
The trace-events for a given source file should generally
always live in the same directory as the source file.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170125161417.31949-3-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-01-31 17:11:17 +00:00
Daniel P. Berrange
ba78db44f6 make: move top level dir to end of include search path
Currently the search path is

  1. source dir corresponding to input file (implicit by compiler)
  2. top level build dir
  3. top level source dir
  4. top level source include/ dir
  5. source dir corresponding to input file
  6. build dir corresponding to output file

Search item 5 is an effective no-op, since it duplicates item 1.
When srcdir == builddir, item 6 also duplicates item 1, which
causes a semantic difference between VPATH and non-VPATH builds.

Thus to ensure consistent semantics we need item 6 to be present
immediately after item 1. e.g.

  1. source dir corresponding to input file (implicit by compiler)
  2. build dir corresponding to output file
  3. top level build dir
  4. top level source dir
  5. top level source include/ dir

When srcdir == builddir, items 1 & 2 collapse into one, and items
3 & 4 collapse into one, but the overall search order is still
consistent with srcdir != builddir

A further complication is that while most of the source files
are built with a current directory of $BUILD_DIR, target dependant
files are built with a current directory of $BUILD_DIR/$TARGET.

As a result, search item 2 resolves to a different location for
target independant vs target dependant files. For example when
building 'migration/ram.o', the use of '-I$(@D)' (which expands
to '-Imigration') would not find '$BUILD_DIR/migration', but
rather '$BUILD_DIR/$TARGET/migration'.

If there are generated headers files to be used by the migration
code in '$BUILD_DIR/migration', these will not be found by the
relative include, an absolute include is needed instead. This
has not been a problem so far, since nothing has been generating
headers in sub-dirs, but the trace code will shortly be doing
that. So it is needed to list '-I$(BUILD_DIR)/$(@D)' as well as
'-I$(@D)' to ensure both directories are searched when building
target dependant code. So the search order ends up being:

  1. source dir corresponding to input file (implicit by compiler)
  2. build dir corresponding to output file (absolute)
  3. build dir corresponding to output file (relative to cwd)
  4. top level build dir
  5. top level source dir
  6. top level source include/ dir

One final complication is that the absolute '-I$(BUILD_DIR)/$(@D)'
will sometimes end up pointing to a non-existant directory if
that sub-dir does not have any target-independant files to be
built. Rather than try to dynamically filter this, a simple
'mkdir' ensures $(BUILD_DIR)/$(@D) is guaranteed to exist at
all times.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20170125161417.31949-2-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-01-31 17:11:17 +00:00
Michael S. Tsirkin
df45892c12 qxl: switch to constants within BUILD_BUG_ON
We are switching BUILD_BUG_ON to verify that it's parameter is a
compile-time constant, and it turns out that some gcc versions
(specifically gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609) are
not smart enough to figure it out for expressions involving local
variables. This is harmless but means that the check is ineffective for
these platforms.  To fix, replace variables with macros.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 15:57:27 +02:00
Michael S. Tsirkin
f298318284 compiler: drop ; after BUILD_BUG_ON
All users include the trailing ; anyway, let's require that -
it seems cleaner.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-01-31 15:57:27 +02:00
Marc-André Lureau
178fe0ae9d char: move to chardev/
The following commits will split char.c in several files. Let's put them
in a subdirectory.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:03:46 +04:00
Marc-André Lureau
0b663b7d77 char: remove class kind field
The class kind is necessary to lookup the chardev name in
qmp_chardev_add() after calling qemu_chr_new_from_opts() and to set
the appropriate ChardevBackend (mainly to free the right
fields).

qemu_chr_new_from_opts() can be changed to use a non-qmp function
using the chardev class typename. Introduce qemu_chardev_add() to be
called from qemu_chr_new_from_opts() and remove the class chardev kind
field. Set the backend->type in the parse callback (when non-common
fields are added).

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:03:42 +04:00
Marc-André Lureau
279b066e4c char: rename remaining CharDriver to Chardev
CharDriver no longer exists, it has been replaced with Chardev.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:47 +04:00
Marc-André Lureau
88cace9f11 char: get rid of CharDriver
qemu_chr_new_from_opts() is modified to not need CharDriver backend[]
array, but uses instead objectified qmp_query_chardev_backends() and
char_get_class(). The alias field is moved outside in a ChardevAlias[],
similar to QDevAlias for devices.

"kind" and "parse" are moved to ChardevClass ("kind" is to be removed
next)

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:47 +04:00
Marc-André Lureau
8cddc46990 char: remove chr_free
Now it uses Object instance_finalize instead.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:47 +04:00
Marc-André Lureau
7f2fe073f3 char-fd: convert to finalize
char-serial inherits from char-fd finalizer.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:47 +04:00
Marc-André Lureau
53a5736f94 char-win: convert to finalize
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:47 +04:00
Marc-André Lureau
c266d94e7b char-win: do not override chr_free
For some unclear reason to me, char-file does not have chr_free on
win32. Since we want to switch to instance finalizer instead of class
chr_free, we should be able to run the base WinChardev class finalizer
in any case. Use a boolean to skip free to ease the transition to
instance finalizer.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:47 +04:00
Marc-André Lureau
1566b0c455 char-win-stdio: convert to finalize
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:47 +04:00
Marc-André Lureau
4d833ada52 char-stdio: convert to finalize
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:47 +04:00
Marc-André Lureau
c930572883 char-parallel: convert parallel to finalize
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:47 +04:00
Marc-André Lureau
9fa2f7a4ed char-ringbuf: convert to finalize
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:46 +04:00
Marc-André Lureau
fa943b5ea0 char-pty: convert to finalize
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:46 +04:00
Marc-André Lureau
2c3a5dcbf8 char-socket: convert to finalize
Notice that finalize() will be run after a failure to open(), so cleanup
code must be adjusted.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:46 +04:00
Marc-André Lureau
819aad230a char-udp: convert to finalize
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:46 +04:00
Marc-André Lureau
980d0414ce mux: convert to finalize
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:46 +04:00
Marc-André Lureau
8955e8914c msmouse: convert to finalize
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:46 +04:00
Marc-André Lureau
e96ebf494a baum: convert to finalize
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:46 +04:00
Marc-André Lureau
18c508acdb spice-qemu-char: convert to finalize
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-01-31 13:01:46 +04:00
Marc-André Lureau
55fc84a7a3 MAINTAINERS: add myself to qemu-char.c
I consider to have enough experience with qemu-char to propose myself as
maintainer. This will allow me to send pull request without waiting for
Paolo.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2017-01-31 13:00:10 +04:00
Michael S. Tsirkin
25e6a11832 ppc: switch to constants within BUILD_BUG_ON
We are switching BUILD_BUG_ON to verify that it's parameter is a
compile-time constant, and it turns out that some gcc versions
(specifically gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609) are
not smart enough to figure it out for expressions involving local
variables. This is harmless but means that the check is ineffective for
these platforms.  To fix, replace the variable with macros.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
[dwg: Correct a printf format warning]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 14:04:06 +11:00
Thomas Huth
c636367311 target/ppc/cpu-models: Fix/remove bad CPU aliases
There is no CPU model called "7447_v1.2" in our list, so the
"7447" alias should point to "7447_v1.1" instead. Let's also
remove the "codename" aliases that point to non-implemented
CPU models - they are really of no use here.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 13:46:26 +11:00
Thomas Huth
6a07692ffa target/ppc: Remove unused POWERPC_FAMILY(POWER)
We do not support POWER1 CPUs in QEMU, so it does not make sense
to keep this stub around.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 13:46:26 +11:00
Laurent Vivier
42043e4f12 spapr: clock should count only if vm is running
This is a port to ppc of the i386 commit:
    00f4d64 kvmclock: clock should count only if vm is running

We remove timebase_post_load function, and use the VM state
change handler to save and restore the guest_timebase (on stop
and continue).

We keep timebase_pre_save to reduce the clock difference on
migration like in:
    6053a86 kvmclock: reduce kvmclock difference on migration

Time base offset has originally been introduced by commit
    98a8b52 spapr: Add support for time base offset migration

So while VM is paused, the time is stopped. This allows to have
the same result with date (based on Time Base Register) and
hwclock (based on "get-time-of-day" RTAS call).

Moreover in TCG mode, the Time Base is always paused, so this
patch also adjust the behavior between TCG and KVM.

VM state field "time_of_the_day_ns" is now useless but we keep
it to be able to migrate to older version of the machine.

As vmstate_ppc_timebase structure (with timebase_pre_save() and
timebase_post_load() functions) was only used by vmstate_spapr,
we register the VM state change handler only in ppc_spapr_init().

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Thomas Huth
d9d6e78ea8 ppc: Remove unused function cpu_ppc601_rtc_init()
It is completely unused, thus it can be removed without problems.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Suraj Jitindar Singh
216c944eeb target/ppc: Add pcr_supported to POWER9 cpu class definition
pcr_supported is used to define the supported PCR values for a given
processor. A POWER9 processor can support 3.00, 2.07, 2.06 and 2.05
compatibility modes, thus we set this accordingly.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Suraj Jitindar Singh
c104949f64 powerpc/cpu-models: rename ISAv3.00 logical PVR definition
This logical PVR value now corresponds to ISA version 3.00 so rename it
accordingly.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
8b920d8abc target-ppc: Add xvcv[hpsp, sphp] instructions
xvcvhpsp: VSX Vector Convert Half Precision to Single Precision
xvcvsphp: VSX Vector Convert Single Precision to Half Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
a811ec0491 target-ppc: Add xsmulqp instruction
xsmulqp: VSX Scalar Multiply Quad-Precision

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
314c116347 target-ppc: Add xsdivqp instruction
xsdivqp: VSX Scalar Divide Quad-Precision

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
48ef23cb26 target-ppc: Add xscvsdqp and xscvudqp instructions
xscvsdqp: VSX Scalar Convert Signed Doubleword format to
          Quad-Precision format
xscvudqp: VSX Scalar Convert Unsigned Doubleword format to
          Quad-Precision format

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
c6d8c5ba5a target-ppc: Use ppc_vsr_t.f128 in xscmp[o,u,exp]qp
xscmpoqp, xscmpuqp & xscmpexpqp were added before f128 field was
introduced in ppc_vsr_t. Now that we have it, use it instead of
generating the 128 bit float using two 64bit fields.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Jose Ricardo Ziviani
5c32e2e4a0 ppc: Implement bcdutrunc. instruction
bcdutrunc. Decimal unsigned truncate. Works like bcdtrunc. with
unsigned BCD numbers.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Jose Ricardo Ziviani
31bc4d114a ppc: Implement bcdtrunc. instruction
bcdtrunc.: Decimal integer truncate. Given a BCD number in vrb and the
number of bytes to truncate in vra, the return register will have vrb
with such bits truncated.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Hervé Poussineau
8178e89cbc ppc/prep: update MAINTAINERS file
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
05590b9252 target-ppc: Add xscvqps[d,w]z instructions
xscvqpsdz: VSX Scalar truncate & Convert Quad-Precision format to
           Signed Doubleword format
xscvqpswz: VSX Scalar truncate & Convert Quad-Precision format to
           Signed Word format

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
cf9465a166 target-ppc: Add xvxsigdp instruction
xvxsigdp: VSX Vector Extract Significand Dual Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
c5969d2eb1 target-ppc: Add xvxsigsp instruction
xvxsigsp: VSX Vector Extract Significand Single Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
46804e2875 target-ppc: Add xvxexpdp instruction
xvxexpdp: VSX Vector Extract Exponent Dual Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
08f1ee5a09 target-ppc: Add xvxexpsp instruction
xvxexpsp: VSX Vector Extract Exponent Single Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
e385e4b7db target-ppc: Add xviexpdp instruction
xviexpdp: VSX Vector Insert Exponent Dual Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
d9031405a7 target-ppc: Add xviexpsp instruction
xviexpsp: VSX Vector Insert Exponent Single Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
8a9472ec38 target-ppc: Add xsiexpqp instruction
xsiexpqp: VSX Scalar Insert Exponent Quad Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
1b8d663d62 target-ppc: Add xsiexpdp instruction
xsiexpdp: VSX Scalar Insert Exponent Double Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Jose Ricardo Ziviani
a54238adac ppc: Implement bcdsr. instruction
bcdsr.: Decimal shift and round. This instruction works like bcds.
however, when performing right shift, 1 will be added to the
result if the last digit was >= 5.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Jose Ricardo Ziviani
a49a95e9e4 ppc: Implement bcdus. instruction
bcdus.: Decimal unsigned shift. This instruction works like bcds. but
considers only unsigned BCDs (no sign in least meaning 4 bits).

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Jose Ricardo Ziviani
e04797f79e ppc: Implement bcds. instruction
bcds.: Decimal shift. Given two registers vra and vrb, this instruction
shift the vrb value by vra bits into the result register.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Jose Ricardo Ziviani
f539fbe337 host-utils: Implement unsigned quadword left/right shift and unit tests
Implements 128-bit left shift and right shift as well as their
testcases. By design, shift silently mods by 128, so the caller is
responsible to assert the shift range if necessary.

Left shift sets the overflow flag if any non-zero digit is shifted out.

Examples:
 ulshift(&low, &high, 250, &overflow);
 equivalent: n << 122

 urshift(&low, &high, -2);
 equivalent: n << 126

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[dwg: Added test-shift128 to .gitignore]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Jose Ricardo Ziviani
6758c192b0 host-utils: Move 128-bit guard macro to .c file
It is not possible to implement functions in host-utils.c for
architectures with quadwords because the guard is implemented in the
Makefile. This patch move the guard out of the Makefile to the
implementation file.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
5d51eaea84 softfloat: Fix the default qNAN for target-ppc
Currently float128_default_nan() returns 0xFFFF800000000000 in the
higher double word, but it should return 0x7FFF800000000000 which
is the correct higher double word for default qNAN on PowerPC.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
c3e4293ac9 target-ppc: xscvqpdp zero VSR
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Jose Ricardo Ziviani
365206aeb3 ppc: Fix a warning in bcdcfz code and improve BCD_DIG_BYTE macro
This commit fixes a warning in the code "(i * 2) ? .. : ..", which
should be better as "i ? .. : ..", and improves the BCD_DIG_BYTE
macro by placing parentheses around its argument to avoid possible
expansion issues like: BCD_DIG_BYTE(i + j).

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Roman Kapl
0dfe952dc5 ppc: Prevent inifnite loop in decrementer auto-reload.
If the DECAR register is set to 0, QEMU tries to reload the decrementer with
zero in an inifinite loop. According to PPC documentation, the decrementer is
triggered on 1->0 transition, so avoid reloading the decrementer if if is
already zero.

The problem does not manifest under Linux, but it is valid to set DECAR to zero
(and may make sense as part of decrementer initialization when interrupts are
disabled).

Signed-off-by: Roman Kapl <rka@sysgo.com>
[dwg: Fixed style nit]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
2a084dadcb target-ppc: Add xscvqpdp instruction
xscvqpdp:  VSX Scalar round & Convert Quad-Precision format to
           Double-Precision format

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
e548780359 target-ppc: Add xscvdpqp instruction
xscvdpqp: VSX Scalar Convert Double-Precision format to
          Quad-Precision format

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
07bdd2478b target-ppc: Add xsaddqp instructions
xsaddqp:  VSX Scalar Add Quad-Precision

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
David Gibson
f6f242c757 ppc: Add ppc_set_compat_all()
Once a compatiblity mode is negotiated with the guest,
h_client_architecture_support() uses run_on_cpu() to update each CPU to
the new mode.  We're going to want this logic somewhere else shortly,
so make a helper function to do this global update.

We put it in target-ppc/compat.c - it makes as much sense at the CPU level
as it does at the machine level.  We also move the cpu_synchronize_state()
into ppc_set_compat(), since it doesn't really make any sense to call that
without synchronizing state.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
David Gibson
152ef803ce pseries: Rewrite CAS PVR compatibility logic
During boot, PAPR guests negotiate CPU model support with the
ibm,client-architecture-support mechanism.  The logic to implement this in
qemu is very convoluted.  This cleans it up to be cleaner, using the new
ppc_check_compat() call.

The new logic for choosing a compatibility mode is:
    1. Usually, use the most recent compatibility mode that is
            a) supported by the guest
            b) supported by the CPU
        and c) no later than the maximum allowed (if specified)
    2. If no suitable compatibility mode was found, the guest *does*
       support this CPU explicitly, and no maximum compatibility mode is
       specified, then use "raw" mode for the current CPU
    3. Otherwise, fail the boot.

This differs from the results of the old code: the old code preferred using
"raw" mode to a compatibility mode, whereas the new code prefers a
compatibility mode if available.  Using compatibility mode preferentially
means that we're more likely to be able to migrate the guest to a similar
but not identical host.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
David Gibson
ef29122649 pxb: Restrict to x86
The PCI Expander Bridge (PXB) device is essentially a hack to allow
different PCIe devices to be assigned to different NUMA nodes on x86.  Each
PXB is sort-of a separate PCI host bridge, except that its config space
is shared with the config space of the main PCI host bridge, rather than
being independent.

This is only necessary if the platform doesn't (easily) allow truly
independent PCI host bridges.  AFAIK that's just x86.

This patch makes it possible to configure PXB out of the build, and adjusts
the default configs so it's only included on x86 targets.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
29f8ddb72f target-ppc: Add xsxsigqp instructions
xsxsigqp: VSX Scalar Extract Significand Quad Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
05538220ac target-ppc: Add xsxsigdp instruction
xsxsigdp: VSX Scalar Extract Significand Dual Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
9eceae320e target-ppc: Add xsxexpqp instruction
xsxexpqp: VSX Scalar Extract Exponent Quad Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Nikunj A Dadhania
08e149869e target-ppc: Add xsxexpdp instruction
xsxexpdp: VSX Scalar Extract Exponent Dual Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
9aeae8e16e target-ppc: Use correct precision for FPRF setting
Use correct FP precision when setting FPRF in FP conversion helpers
instead of always assuming float64 precision.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
f566c0474a target-ppc: Add xscvdphp, xscvhpdp
xscvdphp: VSX Scalar round & Convert Double-Precision format to
          Half-Precision format
xscvhpdp: VSX Scalar Convert Half-Precision format to
          Double-Precision format

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
ffc67420f9 target-ppc: Rename helper_compute_fprf to helper_compute_fprf_float64
Since helper_compute_fprf() works on float64 argument, rename it
to helper_compute_fprf_float64(). Also use a macro to generate
helper_compute_fprf_float64() so that float128 version of the same
helper can be introduced easily later.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
5dc22bf581 target-ppc: Replace isden by float64_is_zero_or_denormal
Replace isden() by float64_is_zero_or_denormal() so that code in
helper_compute_fprf() can be reused to work with float128 argument.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:14 +11:00
Bharata B Rao
1383602e0d target-ppc: Use float64 arg in helper_compute_fprf()
Use float64 argument instead of unit64_t in helper_compute_fprf()
This allows code in helper_compute_fprf() to be reused later to
work with float128 argument too.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Hervé Poussineau
34b9b5575b prep: add IBM RS/6000 7020 (40p) machine emulation
Machine supports both Open Hack'Ware and OpenBIOS.
Open Hack'Ware is the default because OpenBIOS is currently unable to boot
PReP boot partitions or PReP kernels.

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
[dwg: Correct compile failure with KVM located by Thomas Huth]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Hervé Poussineau
79623312c6 prep: add IBM RS/6000 7020 (40p) memory controller
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
[dwg: Added CONFIG_RS6000_MC to ppc64 or it breaks testcases]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Hervé Poussineau
d2f8415226 prep: add PReP System I/O
This device is a partial duplicate of System I/O device available in hw/ppc/prep.c
This new one doesn't have all the Motorola-specific registers.
The old one should be deprecated and removed with the 'prep' machine.

Partial documentation available at
ftp://ftp.software.ibm.com/rs6000/technology/spec/srp1_1.exe
section 6.1.5 (I/O Device Mapping)

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Nikunj A Dadhania
3398b7428b target-ppc: Add xxinsertw instruction
xxinsertw: VSX Vector Insert Word

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Nikunj A Dadhania
8ad901e558 target-ppc: Add xxextractuw instruction
xxextractuw: VSX Vector Extract Unsigned Word

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
xiaoqiang zhao
0f358a0710 hw/ppc: QOM'ify spapr_vio.c
Drop the old and empty SysBus init

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
xiaoqiang zhao
09a7eb978f hw/ppc: QOM'ify ppce500_spin.c
Drop the old SysBus init function and use instance_init

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
xiaoqiang zhao
d0c2b0d089 hw/ppc: QOM'ify e500.c
Drop the old SysBus init function and use instance_init

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
xiaoqiang zhao
396781f627 hw/gpio: QOM'ify mpc8xxx.c
* Drop the old SysBus init function and use instance_init
* Change mpc8xxx_gpio_reset to a DeviceClass::reset function

Signed-off-by: xiaoqiang zhao <zxq_yx_007@163.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Laurent Vivier
2bf25e07bb qtest: add ivshmem-test for ppc64
The test has been converted to use libqos, we can
now use it on ppc64. We also make the test fail on
all other architectures.
As libqos on ppc64 is not able to manage hotplug
and IRQ/MSI, we disable this part in the test on ppc64.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
[dwg: Make test conditional on CONFIG_EVENTFD]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Laurent Vivier
d69487d573 qtest: convert ivshmem-test to use libqos
This will allow to use it with ppc64.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Laurent Vivier
b84541693b libqos: fix spapr qpci_map()
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Laurent Vivier
f38a0b2fd7 qtest: add display-vga-test to ppc64
Only enable for ppc64 in the Makefile, but added
code in the file to check cirrus card only on architectures
supporting it (alpha, mips, i386, x86_64).

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Laurent Vivier
2f8e4906ff qtest: add netfilter tests for ppc64
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
David Gibson
9d2179d6f9 ppc: Validate compatibility modes when setting
Current ppc_set_compat() will attempt to set any compatiblity mode
specified, regardless of whether it's available on the CPU.  The caller is
expected to make sure it is setting a possible mode, which is awkwward
because most of the information to make that decision is at the CPU level.

This begins to clean this up by introducing a ppc_check_compat() function
which will determine if a given compatiblity mode is supported on a CPU
(and also whether it lies within specified minimum and maximum compat
levels, which will be useful later).  It also contains an assertion that
the CPU has a "virtual hypervisor"[1], that is, that the guest isn't
permitted to execute hypervisor privilege code.  Without that, the guest
would own the PCR and so could override any mode set here.  Only machine
types which use a virtual hypervisor (i.e. 'pseries') should use
ppc_check_compat().

ppc_set_compat() is modified to validate the compatibility mode it is given
and fail if it's not available on this CPU.

[1] Or user-only mode, which also obviously doesn't allow access to the
hypervisor privileged PCR.  We don't use that now, but could in future.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2017-01-31 10:10:13 +11:00
David Gibson
12dbeb16d0 ppc: Rewrite ppc_get_compat_smt_threads()
To continue consolidation of compatibility mode information, this rewrites
the ppc_get_compat_smt_threads() function using the table of compatiblity
modes in target-ppc/compat.c.

It's not a direct replacement, the new ppc_compat_max_threads() function
has simpler semantics - it just returns the number of threads the cpu
model has, taking into account any compatiblity mode it is in.

This no longer takes into account kvmppc_smt_threads() as the previous
version did.  That check wasn't useful because we check in
ppc_cpu_realizefn() that CPUs aren't instantiated with more threads
than kvm allows (or if we didn't things will already be broken and
this won't make it any worse).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2017-01-31 10:10:13 +11:00
David Gibson
9d6f106552 ppc: Rewrite ppc_set_compat()
This rewrites the ppc_set_compat() function so that instead of open coding
the various compatibility modes, it reads the relevant data from a table.
This is a first step in consolidating the information on compatibility
modes scattered across the code into a single place.

It also makes one change to the logic.  The old code masked the bits
to be set in the PCR (Processor Compatibility Register) by which bits
are valid on the host CPU.  This made no sense, since it was done
regardless of whether our guest CPU was the same as the host CPU or
not.  Furthermore, the actual PCR bits are only relevant for TCG[1] -
KVM instead uses the compatibility mode we tell it in
kvmppc_set_compat().  When using TCG host cpu information usually
isn't even present.

While we're at it, we put the new implementation in a new file to make the
enormous translate_init.c a little smaller.

[1] Actually it doesn't even do anything in TCG, but it will if / when we
    get to implementing compatibility mode logic at that level.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2017-01-31 10:10:13 +11:00
David Gibson
fa325e6cbf pseries: Add pseries-2.9 machine type
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2017-01-31 10:10:13 +11:00
Hervé Poussineau
5904bca84e prep: do not use global variable to access nvram
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Thomas Huth
b99260ebbb hw/ppc/spapr: Fix boot path of usb-host storage devices
When passing through an USB storage device to a pseries guest, it
is currently not possible to automatically boot from the device
if the "bootindex" property has been specified, too (e.g. when using
"-device nec-usb-xhci -device usb-host,hostbus=1,hostaddr=2,bootindex=0"
at the command line). The problem is that QEMU builds a device tree path
like "/pci@800000020000000/usb@0/usb-host@1" and passes it to SLOF
in the /chosen/qemu,boot-list property. SLOF, however, probes the
USB device, recognizes that it is a storage device and thus changes
its name to "storage", and additionally adds a child node for the
SCSI LUN, so the correct boot path in SLOF is something like
"/pci@800000020000000/usb@0/storage@1/disk@101000000000000" instead.
So when we detect an USB mass storage device with SCSI interface,
we've got to adjust the firmware boot-device path properly that
SLOF can automatically boot from the device.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1354177
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Nikunj A Dadhania
e122090df3 target-ppc: implement stxvll instructions
stxvll: Store VSX Vector Left-justified with Length

Vector (8-bit elements) in BE/LE:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Storing 14 bytes would result in following Little/Big-endian Storage:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Nikunj A Dadhania
681c247833 target-ppc: implement stxvl instruction
stxvl: Store VSX Vector with Length

Vector (8-bit elements) in BE:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Vector (8-bit elements) in LE:
+--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|00|00|“T”|“S”|“E”|“T”|“ ”|“a”|“ ”|“s”|“i”|“ ”|“s”|“i”|"h"|"T"|
+--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+

Storing 14 bytes would result in following Little/Big-endian Storage:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Nikunj A Dadhania
176e44e7eb target-ppc: implement lxvll instruction
lxvll: Load VSX Vector Left-justified with Length

Little/Big-endian Storage:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Loading 14 bytes to vector (8-bit elements) in BE/LE:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Nikunj A Dadhania
6914bc4fb5 target-ppc: implement lxvl instruction
lxvl: Load VSX Vector with Length

Little/Big-endian Storage:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Loading 14 bytes results in:

Vector (8-bit elements) in BE:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Vector (8-bit elements) in LE:
+--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|00|00|“T”|“S”|“E”|“T”|“ ”|“a”|“ ”|“s”|“i”|“ ”|“s”|“i”|"h"|"T"|
+--+--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Bharata B Rao
234068abfb target-ppc: Add xxperm and xxpermr instructions
xxperm:  VSX Vector Permute
xxpermr: VSX Vector Permute Right-indexed

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Nikunj A Dadhania
014ed3bb20 target-ppc: implement xscpsgnqp instruction
xscpsgnqp: VSX Scalar Copy Sign Quad-Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Nikunj A Dadhania
8497d7fc69 target-ppc: implement xsnegqp instruction
xsnegqp: VSX Scalar Negate Quad-Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Jose Ricardo Ziviani
071663dfc3 target-ppc: Implement bcd_is_valid function
A function to check if all digits of a given BCD number is valid is
here presented because more instructions will need to reuse the
same code.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
David Gibson
3259dbd9df target-ppc: implement xsabsqp/xsnabsqp instruction
xsabsqp:  VSX Scalar Absolute Quad-Precision
xsnabsqp: VSX Scalar Negative Absolute Quad-Precision

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Nikunj A Dadhania
cdee0e72d0 target-ppc: implement stop instruction
Use the nap code.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Nikunj A Dadhania
f9f2ed5ae0 target-ppc: move ppc_vsr_t to common header
The structure and corresponding defines and functions need to be used
outside of fpu_helper.c as well.

Add u8, u16, u32 and Int128 to the structure.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Nicholas Piggin
1c7ad77e56 ppc/spapr: implement H_SIGNAL_SYS_RESET
The H_SIGNAL_SYS_RESET hcall allows a guest CPU to raise a system reset
exception on CPUs within the same guest -- all CPUs, all-but-self, or a
specific CPU (including self).

This has not made its way to a PAPR release yet, but we have an hcall
number assigned.

  H_SIGNAL_SYS_RESET = 0x380

  Syntax:
    hcall(uint64 H_SIGNAL_SYS_RESET, int64 target);

  Generate a system reset NMI on the threads indicated by target.

  Values for target:
    -1 = target all online threads including the caller
    -2 = target all online threads except for the caller
    All other negative values: reserved
    Positive values: The thread to be targeted, obtained from the value
    of the "ibm,ppc-interrupt-server#s" property of the CPU in the OF
    device tree.

  Semantics:
    - Invalid target: return H_Parameter.
    - Otherwise: Generate a system reset NMI on target thread(s),
      return H_Success.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
David Gibson
d6e166c082 ppc: Rename cpu_version to compat_pvr
The 'cpu_version' field in PowerPCCPU is badly named.  It's named after the
'cpu-version' device tree property where it is advertised, but that meaning
may not be obvious in most places it appears.

Worse, it doesn't even really correspond to that device tree property.  The
property contains either the processor's PVR, or, if the CPU is running in
a compatibility mode, a special "logical PVR" representing which mode.

Rename the cpu_version field, and a number of related variables to
compat_pvr to make this clearer.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2017-01-31 10:10:13 +11:00
David Gibson
1d1be34d26 ppc: Clean up and QOMify hypercall emulation
The pseries machine type is a bit unusual in that it runs a paravirtualized
guest.  The guest expects to interact with a hypervisor, and qemu
emulates the functions of that hypervisor directly, rather than executing
hypervisor code within the emulated system.

To implement this in TCG, we need to intercept hypercall instructions and
direct them to the machine's hypercall handlers, rather than attempting to
perform a privilege change within TCG.  This is controlled by a global
hook - cpu_ppc_hypercall.

This cleanup makes the handling a little cleaner and more extensible than
a single global variable.  Instead, each CPU to have hypercalls intercepted
has a pointer set to a QOM object implementing a new virtual hypervisor
interface.  A method in that interface is called by TCG when it sees a
hypercall instruction.  It's possible we may want to add other methods in
future.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2017-01-31 10:10:13 +11:00
David Gibson
5b120785e7 pseries: Make cpu_update during CAS unconditional
spapr_h_cas_compose_response() includes a cpu_update parameter which
controls whether it includes updated information on the CPUs in the device
tree fragment returned from the ibm,client-architecture-support (CAS) call.

Providing the updated information is essential when CAS has negotiated
compatibility options which require different cpu information to be
presented to the guest.  However, it should be safe to provide in other
cases (it will just override the existing data in the device tree with
identical data).  This simplifies the code by removing the parameter and
always providing the cpu update information.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2017-01-31 10:10:13 +11:00
David Gibson
0c86d0fd92 pseries: Always use core objects for CPU construction
Currently the pseries machine has two paths for constructing CPUs.  On
newer machine type versions, which support cpu hotplug, it constructs
cpu core objects, which in turn construct CPU threads.  For older machine
versions it individually constructs the CPU threads.

This division is going to make some future changes to the cpu construction
harder, so this patch unifies them.  Now cpu core objects are always
created.  This requires some updates to allow core objects to be created
without a full complement of threads (since older versions allowed a
number of cpus not a multiple of the threads-per-core).  Likewise it needs
some changes to the cpu core hot/cold plug path so as not to choke on the
old machine types without hotplug support.

For good measure, we move the cpu construction to its own subfunction,
spapr_init_cpus().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-01-31 10:10:13 +11:00
Avinesh Kumar
60caf2216b target-ppc: add vextu[bhw][lr]x instructions
vextublx: Vector Extract Unsigned Byte Left
vextuhlx: Vector Extract Unsigned Halfword Left
vextuwlx: Vector Extract Unsigned Word Left
vextubrx: Vector Extract Unsigned Byte Right-Indexed VX-form
vextuhrx: Vector Extract Unsigned  Halfword Right-Indexed VX-form
vextuwrx: Vector Extract Unsigned Word Right-Indexed VX-form

Signed-off-by: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
Signed-off-by: Hariharan T.S. <hari@linux.vnet.ibm.com>
[ implement using int128_rshift ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Jose Ricardo Ziviani
466a3f9ca3 target-ppc: Implement bcdsetsgn. instruction
bcdsetsgn.: Decimal set sign. This instruction copies the register
value to the result register but adjust the signal according to
the preferred sign value.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Jose Ricardo Ziviani
c3025c3b0a target-ppc: Implement bcdcpsgn. instruction
bcdcpsgn.: Decimal copy sign. Given two registers vra and vrb, it
copies the vra value with vrb sign to the result register vrt.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:13 +11:00
Jose Ricardo Ziviani
c85bc7dd90 target-ppc: Implement bcdctsq. instruction
bcdctsq.: Decimal convert to signed quadword. It is possible to
convert packed decimal values to signed quadwords.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:12 +11:00
Jose Ricardo Ziviani
a406c058e7 target-ppc: Implement bcdcfsq. instruction
bcdcfsq.: Decimal convert from signed quadword. It is not possible
to convert values less than -10^31-1 or greater than 10^31-1 to be
represented in packed decimal format.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
[dwg: Corrected constant which should be 10^16-1 but was 10^17-1]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:12 +11:00
Nikunj A Dadhania
d59ba58380 target-ppc: implement lxv/lxvx and stxv/stxvx
lxv:  Load VSX Vector
lxvx: Load VSX Vector Indexed

    Little/Big-endian Storage
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    |F0|F1|F2|F3|F4|F5|F6|F7|E0|E1|E2|E3|E4|E5|E6|E7|
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

    Vector load results:
    BE:
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    |F0|F1|F2|F3|F4|F5|F6|F7|E0|E1|E2|E3|E4|E5|E6|E7|
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

    LE:
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    |E7|E6|E5|E4|E3|E2|E1|E0|F7|F6|F5|F4|F3|F2|F1|F0|
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

stxv: Store VSX Vector
stxvx: Store VSX Vector Indexed

    Vector (8-bit elements) in BE:
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    |F0|F1|F2|F3|F4|F5|F6|F7|E0|E1|E2|E3|E4|E5|E6|E7|
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

    Vector (8-bit elements) in LE:
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    |E7|E6|E5|E4|E3|E2|E1|E0|F7|F6|F5|F4|F3|F2|F1|F0|
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

    Store results in following:
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    |F0|F1|F2|F3|F4|F5|F6|F7|E0|E1|E2|E3|E4|E5|E6|E7|
    +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:12 +11:00
Nikunj A Dadhania
e3001664f1 target-ppc: implement stxsd and stxssp
stxsd:  Store VSX Scalar Dword
stxssp: Store VSX Scalar SP

Moreover, DQ-Form/DS-FORM instructions shares the same primary
opcode(0x3D). For DQ-FORM bits 29:31 are used, for DS-FORM bits 30:31
are used. Common routine to decode primary opcode(0x3D) -
ds-form/dq-form instructions is required.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:12 +11:00
Nikunj A Dadhania
5cb091a4fd target-ppc: implement lxsd and lxssp instructions
lxsd: Load VSX Scalar Dword
lxssp: Load VSX Scalar Single

Moreover, DS-Form instructions shares the same primary opcode, bits
30:31 are used to decode the instruction. Use a common routine to decode
primary opcode(0x39) - ds-form instructions and branch-out depending on
bits 30:31.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:12 +11:00
Bharata B Rao
be0a4faf35 target-ppc: Add xscmpoqp and xscmpuqp instructions
xscmpoqp - VSX Scalar Compare Ordered Quad-Precision
xscmpuqp - VSX Scalar Compare Unordered Quad-Precision

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:12 +11:00
Bharata B Rao
3a20d11d45 target-ppc: Add xscmpexp[dp,qp] instructions
xscmpexpdp: VSX Scalar Compare Exponents Double-Precision
xscmpexpqp: VSX Scalar Compare Exponents Quad-Precision

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:12 +11:00
Bharata B Rao
855f7a657e target-ppc: Fix xscmpodp and xscmpudp instructions
- xscmpodp & xscmpudp are missing flags reset.
- In xscmpodp, VXCC should be set only if VE is 0 for signalling NaN case
  and VXCC should be set by explicitly checking for quiet NaN case.
- Comparison is being done only if the operands are not NaNs. However as
  per ISA, it should be done even when operands are NaNs.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:12 +11:00
Nikunj A Dadhania
efa7319619 target-ppc: rename CRF_* defines as CRF_*_BIT
Add _BIT to CRF_[GT,LT,EQ_SO] and introduce CRF_[GT,LT,EQ,SO] for usage
without shifts in the code. This would simplify the code.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:12 +11:00
Bharata B Rao
985e3023f7 target-ppc: Consolidate instruction decode helpers
Move instruction decode helpers to target-ppc/internal.h so that some
of these can be used from outside of translate.c. This movement also
helps to get rid of some duplicate helpers from target-ppc/fpu_helper.c.

Suggested-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:12 +11:00
Stefan Weil
bd97a59eec disas/ppc: Fix indefinite articles in comments
Signed-off-by: Stefan Weil <sw@weilnetz.de>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-01-31 10:10:12 +11:00
Anthony PERARD
6d06220ad9 MAINTAINERS: Update xen-devel mailing list address
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
Acked-by: Stefano Stabellini <sstabellini@kernel.org>
2017-01-27 15:23:30 -08:00
Paul Durrant
ae4d2eb273 xen-platform: add missing disk unplug option
The Xen HVM unplug protocol [1] specifies a mechanism to allow guests to
request unplug of 'aux' disks (which is stated to mean all IDE disks,
except the primary master). This patch adds support for that unplug request.

NOTE: The semantics of what happens if unplug of all disks and 'aux' disks
      is simultaneously requests is not clear. The patch makes that
      assumption that an 'all' request overrides an 'aux' request.

[1] http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=docs/misc/hvm-emulated-unplug.markdown

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
----
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Anthony Perard <anthony.perard@citrix.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: John Snow <jsnow@redhat.com>
2017-01-27 15:23:29 -08:00
Paul Durrant
090fa1c8c8 xen-platform: add support for unplugging NVMe disks...
...not just IDE and SCSI.

This patch allows the Xen tool-stack to fully support of NVMe as an
emulated disk type. See [1] for the relevant tool-stack patch discussion.

[1] https://lists.xen.org/archives/html/xen-devel/2017-01/msg01225.html

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
2017-01-27 15:23:29 -08:00
Paul Durrant
3d89e3f7e8 xen-platform: re-structure unplug_disks
The current code is poorly structured and potentially leads to multiple
config space reads when one is sufficient. Also the UNPLUG_ALL_IDE_DISKS
flag is mis-named since it also results in SCSI disks being unplugged.

This patch renames the flag and re-structures the code to be more
efficient, and readable.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
2017-01-27 15:23:28 -08:00
848 changed files with 38669 additions and 16647 deletions

23
.gitignore vendored
View File

@@ -6,18 +6,12 @@
/config.status
/config-temp
/trace-events-all
/trace/generated-tracers.h
/trace/generated-tracers.c
/trace/generated-tracers-dtrace.h
/trace/generated-tracers.dtrace
/trace/generated-events.h
/trace/generated-events.c
/trace/generated-helpers-wrappers.h
/trace/generated-helpers.h
/trace/generated-helpers.c
/trace/generated-tcg-tracers.h
/trace/generated-ust-provider.h
/trace/generated-ust.c
/ui/shader/texture-blit-frag.h
/ui/shader/texture-blit-vert.h
*-timestamp
@@ -113,6 +107,7 @@ docs/qemu-ga-ref.info*
docs/qemu-qmp-ref.info*
/qemu-ga-qapi.texi
/qemu-qapi.texi
/version.texi
*.tps
.stgit-*
cscope.*
@@ -120,3 +115,19 @@ tags
TAGS
docker-src.*
*~
trace.h
trace.c
trace-ust.h
trace-ust.h
trace-dtrace.h
trace-dtrace.dtrace
trace-root.h
trace-root.c
trace-ust-root.h
trace-ust-root.h
trace-ust-all.h
trace-ust-all.c
trace-dtrace-root.h
trace-dtrace-root.dtrace
trace-ust-all.h
trace-ust-all.c

21
.shippable.yml Normal file
View File

@@ -0,0 +1,21 @@
language: c
env:
matrix:
- IMAGE=debian-armhf-cross
TARGET_LIST=arm-softmmu,arm-linux-user
- IMAGE=debian-arm64-cross
TARGET_LIST=aarch64-softmmu,aarch64-linux-user
- IMAGE=debian-s390x-cross
TARGET_LIST=s390x-softmmu,s390x-linux-user
build:
pre_ci:
- make docker-image-${IMAGE}
pre_ci_boot:
image_name: qemu
image_tag: ${IMAGE}
pull: false
options: "-e HOME=/root"
ci:
- unset CC
- ./configure ${QEMU_CONFIGURE_OPTS} --target-list=${TARGET_LIST}
- make -j2

View File

@@ -92,8 +92,8 @@ matrix:
- env: CONFIG=""
os: osx
compiler: clang
# Plain Trusty Build
- env: CONFIG=""
# Plain Trusty System Build
- env: CONFIG="--disable-linux-user"
sudo: required
addons:
dist: trusty
@@ -103,16 +103,45 @@ matrix:
- sudo apt-get build-dep -qq qemu
- wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
- git submodule update --init --recursive
# Trusty build with latest stable clang
- env: CONFIG=""
# Plain Trusty Linux User Build
- env: CONFIG="--disable-system"
sudo: required
addons:
dist: trusty
compiler: gcc
before_install:
- sudo apt-get update -qq
- sudo apt-get build-dep -qq qemu
- wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
- git submodule update --init --recursive
# Trusty System build with latest stable clang
- sudo: required
addons:
dist: trusty
language: generic
compiler: none
env:
- COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
- CONFIG="--cc=clang-3.9 --cxx=clang++-3.9"
- CONFIG="--disable-linux-user --cc=clang-3.9 --cxx=clang++-3.9"
before_install:
- wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
- sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
- sudo apt-get update -qq
- sudo apt-get install -qq -y clang-3.9
- sudo apt-get build-dep -qq qemu
- wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
- git submodule update --init --recursive
before_script:
- ./configure ${CONFIG} || cat config.log
# Trusty Linux User build with latest stable clang
- sudo: required
addons:
dist: trusty
language: generic
compiler: none
env:
- COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
- CONFIG="--disable-system --cc=clang-3.9 --cxx=clang++-3.9"
before_install:
- wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
- sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'

View File

@@ -116,3 +116,10 @@ if (a == 1) {
Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read.
Besides, good compilers already warn users when '==' is mis-typed as '=',
even when the constant is on the right.
7. Comment style
We use traditional C-style /* */ comments and avoid // comments.
Rationale: The // form is valid in C99, so this is purely a matter of
consistency of style. The checkpatch script will warn you about this.

View File

@@ -323,7 +323,7 @@ Guest CPU Cores (Xen):
X86
M: Stefano Stabellini <sstabellini@kernel.org>
M: Anthony Perard <anthony.perard@citrix.com>
L: xen-devel@lists.xensource.com
L: xen-devel@lists.xenproject.org
S: Supported
F: xen-*
F: */xen*
@@ -561,20 +561,19 @@ F: hw/lm32/milkymist.c
M68K Machines
-------------
an5206
S: Orphan
M: Thomas Huth <huth@tuxfamily.org>
S: Odd Fixes
F: hw/m68k/an5206.c
F: hw/m68k/mcf5206.c
dummy_m68k
S: Orphan
F: hw/m68k/dummy_m68k.c
mcf5208
S: Orphan
M: Thomas Huth <huth@tuxfamily.org>
S: Odd Fixes
F: hw/m68k/mcf5208.c
F: hw/m68k/mcf_intc.c
F: hw/char/mcf_uart.c
F: hw/net/mcf_fec.c
F: include/hw/m68k/mcf*.h
MicroBlaze Machines
-------------------
@@ -671,10 +670,13 @@ F: hw/misc/macio/
F: hw/intc/heathrow_pic.c
PReP
M: Hervé Poussineau <hpoussin@reactos.org>
L: qemu-devel@nongnu.org
L: qemu-ppc@nongnu.org
S: Odd Fixes
S: Maintained
F: hw/ppc/prep.c
F: hw/ppc/prep_systemio.c
F: hw/ppc/rs6000_mc.c
F: hw/pci-host/prep.[hc]
F: hw/isa/pc87312.[hc]
F: pc-bios/ppc_rom.bin
@@ -1031,7 +1033,7 @@ F: hw/input/virtio-input*.c
F: include/hw/virtio/virtio-input.h
virtio-serial
M: Amit Shah <amit.shah@redhat.com>
M: Amit Shah <amit@kernel.org>
S: Supported
F: hw/char/virtio-serial-bus.c
F: hw/char/virtio-console.c
@@ -1040,7 +1042,7 @@ F: tests/virtio-console-test.c
F: tests/virtio-serial-test.c
virtio-rng
M: Amit Shah <amit.shah@redhat.com>
M: Amit Shah <amit@kernel.org>
S: Supported
F: hw/virtio/virtio-rng.c
F: include/hw/virtio/virtio-rng.h
@@ -1194,8 +1196,9 @@ T: git git://github.com/jnsnow/qemu.git bitmaps
Character device backends
M: Paolo Bonzini <pbonzini@redhat.com>
M: Marc-André Lureau <marcandre.lureau@redhat.com>
S: Maintained
F: qemu-char.c
F: chardev/
F: backends/msmouse.c
F: backends/testdev.c
@@ -1427,7 +1430,6 @@ F: scripts/checkpatch.pl
Migration
M: Juan Quintela <quintela@redhat.com>
M: Amit Shah <amit.shah@redhat.com>
M: Dr. David Alan Gilbert <dgilbert@redhat.com>
S: Maintained
F: include/migration/
@@ -1798,9 +1800,14 @@ F: docs/block-replication.txt
Build and test automation
-------------------------
M: Alex Bennée <alex.bennee@linaro.org>
M: Fam Zheng <famz@redhat.com>
L: qemu-devel@nongnu.org
S: Supported
S: Maintained
F: .travis.yml
F: .shippable.yml
F: tests/docker/
W: https://travis-ci.org/qemu/qemu
W: http://patchew.org/QEMU/
Documentation
-------------
@@ -1809,9 +1816,3 @@ M: Daniel P. Berrange <berrange@redhat.com>
S: Odd Fixes
F: docs/build-system.txt
Docker testing
--------------
Docker based testing framework and cases
M: Fam Zheng <famz@redhat.com>
S: Maintained
F: tests/docker/

183
Makefile
View File

@@ -56,25 +56,136 @@ GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
GENERATED_HEADERS += qmp-introspect.h
GENERATED_SOURCES += qmp-introspect.c
GENERATED_HEADERS += trace/generated-tracers.h
ifeq ($(findstring dtrace,$(TRACE_BACKENDS)),dtrace)
GENERATED_HEADERS += trace/generated-tracers-dtrace.h
endif
GENERATED_SOURCES += trace/generated-tracers.c
GENERATED_HEADERS += trace/generated-tcg-tracers.h
GENERATED_HEADERS += trace/generated-helpers-wrappers.h
GENERATED_HEADERS += trace/generated-helpers.h
GENERATED_SOURCES += trace/generated-helpers.c
ifeq ($(findstring ust,$(TRACE_BACKENDS)),ust)
GENERATED_HEADERS += trace/generated-ust-provider.h
GENERATED_SOURCES += trace/generated-ust.c
ifdef CONFIG_TRACE_UST
GENERATED_HEADERS += trace-ust-all.h
GENERATED_SOURCES += trace-ust-all.c
endif
GENERATED_HEADERS += module_block.h
TRACE_HEADERS = trace-root.h $(trace-events-subdirs:%=%/trace.h)
TRACE_SOURCES = trace-root.c $(trace-events-subdirs:%=%/trace.c)
TRACE_DTRACE =
ifdef CONFIG_TRACE_DTRACE
TRACE_HEADERS += trace-dtrace-root.h $(trace-events-subdirs:%=%/trace-dtrace.h)
TRACE_DTRACE += trace-dtrace-root.dtrace $(trace-events-subdirs:%=%/trace-dtrace.dtrace)
endif
ifdef CONFIG_TRACE_UST
TRACE_HEADERS += trace-ust-root.h $(trace-events-subdirs:%=%/trace-ust.h)
endif
GENERATED_HEADERS += $(TRACE_HEADERS)
GENERATED_SOURCES += $(TRACE_SOURCES)
trace-group-name = $(shell dirname $1 | sed -e 's/[^a-zA-Z0-9]/_/g')
%/trace.h: %/trace.h-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
%/trace.h-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
--group=$(call trace-group-name,$@) \
--format=h \
--backends=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(@:%-timestamp=%)")
%/trace.c: %/trace.c-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
%/trace.c-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
--group=$(call trace-group-name,$@) \
--format=c \
--backends=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(@:%-timestamp=%)")
%/trace-ust.h: %/trace-ust.h-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
%/trace-ust.h-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
--group=$(call trace-group-name,$@) \
--format=ust-events-h \
--backends=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(@:%-timestamp=%)")
%/trace-dtrace.dtrace: %/trace-dtrace.dtrace-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
%/trace-dtrace.dtrace-timestamp: $(SRC_PATH)/%/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
--group=$(call trace-group-name,$@) \
--format=d \
--backends=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(@:%-timestamp=%)")
%/trace-dtrace.h: %/trace-dtrace.dtrace $(tracetool-y)
$(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@")
%/trace-dtrace.o: %/trace-dtrace.dtrace $(tracetool-y)
trace-root.h: trace-root.h-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
trace-root.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
--group=root \
--format=h \
--backends=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(@:%-timestamp=%)")
trace-root.c: trace-root.c-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
trace-root.c-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
--group=root \
--format=c \
--backends=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(@:%-timestamp=%)")
trace-ust-root.h: trace-ust-root.h-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
trace-ust-root.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
--group=root \
--format=ust-events-h \
--backends=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(@:%-timestamp=%)")
trace-ust-all.h: trace-ust-all.h-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
trace-ust-all.h-timestamp: $(trace-events-files) $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
--group=all \
--format=ust-events-h \
--backends=$(TRACE_BACKENDS) \
$(trace-events-files) > $@,"GEN","$(@:%-timestamp=%)")
trace-ust-all.c: trace-ust-all.c-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
trace-ust-all.c-timestamp: $(trace-events-files) $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
--group=all \
--format=ust-events-c \
--backends=$(TRACE_BACKENDS) \
$(trace-events-files) > $@,"GEN","$(@:%-timestamp=%)")
trace-dtrace-root.dtrace: trace-dtrace-root.dtrace-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
trace-dtrace-root.dtrace-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
$(call quiet-command,$(TRACETOOL) \
--group=root \
--format=d \
--backends=$(TRACE_BACKENDS) \
$< > $@,"GEN","$(@:%-timestamp=%)")
trace-dtrace-root.h: trace-dtrace-root.dtrace
$(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@")
trace-dtrace-root.o: trace-dtrace-root.dtrace
# Don't try to regenerate Makefile or configure
# We don't generate any of them
Makefile: ;
@@ -147,6 +258,7 @@ endif
dummy := $(call unnest-vars,, \
stub-obj-y \
chardev-obj-y \
util-obj-y \
qga-obj-y \
ivshmem-client-obj-y \
@@ -160,7 +272,8 @@ dummy := $(call unnest-vars,, \
qom-obj-y \
io-obj-y \
common-obj-y \
common-obj-m)
common-obj-m \
trace-obj-y)
ifneq ($(wildcard config-host.mak),)
include $(SRC_PATH)/tests/Makefile.include
@@ -186,7 +299,11 @@ qemu-version.h: FORCE
printf '""\n'; \
fi; \
fi) > $@.tmp)
$(call quiet-command, cmp -s $@ $@.tmp || mv $@.tmp $@)
$(call quiet-command, if ! cmp -s $@ $@.tmp; then \
mv $@.tmp $@; \
else \
rm $@.tmp; \
fi)
config-host.h: config-host.h-timestamp
config-host.h-timestamp: config-host.mak
@@ -223,7 +340,8 @@ subdir-dtc:dtc/libfdt dtc/tests
dtc/%:
mkdir -p $@
$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))
$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(chardev-obj-y) \
$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY)) $(trace-obj-y)
ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
# Only keep -O and -g cflags
@@ -247,15 +365,17 @@ libqemuutil.a: $(util-obj-y)
######################################################################
COMMON_LDADDS = $(trace-obj-y) libqemuutil.a libqemustub.a
qemu-img.o: qemu-img-cmds.h
qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a
qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a
fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o $(COMMON_LDADDS)
fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap
qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
@@ -320,7 +440,7 @@ $(qapi-modules) $(SRC_PATH)/scripts/qapi-introspect.py $(qapi-py)
QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
$(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)
qemu-ga$(EXESUF): $(qga-obj-y) libqemuutil.a libqemustub.a
qemu-ga$(EXESUF): $(qga-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
ifdef QEMU_GA_MSI_ENABLED
@@ -345,9 +465,9 @@ ifneq ($(EXESUF),)
qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
endif
ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a
ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
@@ -396,7 +516,7 @@ distclean: clean
rm -f qemu-doc.vr qemu-doc.txt
rm -f config.log
rm -f linux-headers/asm
rm -f qemu-ga-qapi.texi qemu-qapi.texi
rm -f qemu-ga-qapi.texi qemu-qapi.texi version.texi
rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
@@ -473,7 +593,7 @@ endif
endif
install: all $(if $(BUILD_DOCS),install-doc) \
install: all $(if $(BUILD_DOCS),install-doc) $(BUILD_DIR)/trace-events-all \
install-datadir install-localstatedir
ifneq ($(TOOLS),)
$(call install-prog,$(subst qemu-ga,qemu-ga$(EXESUF),$(TOOLS)),$(DESTDIR)$(bindir))
@@ -543,21 +663,24 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
# documentation
MAKEINFO=makeinfo
MAKEINFOFLAGS=--no-split --number-sections -D 'VERSION $(VERSION)'
TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'
MAKEINFOFLAGS=--no-split --number-sections
TEXIFLAG=$(if $(V),,--quiet)
%.html: %.texi
version.texi: $(SRC_PATH)/VERSION
$(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
%.html: %.texi version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--html $< -o $@,"GEN","$@")
%.info: %.texi
%.info: %.texi version.texi
$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
%.txt: %.texi
%.txt: %.texi version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--plaintext $< -o $@,"GEN","$@")
%.pdf: %.texi
%.pdf: %.texi version.texi
$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o $@,"GEN","$@")
qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
@@ -664,6 +787,10 @@ ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fa
Makefile: $(GENERATED_HEADERS)
endif
.SECONDARY: $(TRACE_HEADERS) $(TRACE_HEADERS:%=%-timestamp) \
$(TRACE_SOURCES) $(TRACE_SOURCES:%=%-timestamp) \
$(TRACE_DTRACE) $(TRACE_DTRACE:%=%-timestamp)
# Include automatically generated dependency files
# Dependencies in Makefile.objs files come from our recursive subdir rules
-include $(wildcard *.d tests/*.d)

View File

@@ -4,15 +4,13 @@ stub-obj-y = stubs/ crypto/
util-obj-y = util/ qobject/ qapi/
util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o qapi-event.o
chardev-obj-y = chardev/
#######################################################################
# block-obj-y is code used by both qemu system emulation and qemu-img
block-obj-y = async.o thread-pool.o
block-obj-y += nbd/
block-obj-y += block.o blockjob.o
block-obj-y += main-loop.o iohandler.o qemu-timer.o
block-obj-$(CONFIG_POSIX) += aio-posix.o
block-obj-$(CONFIG_WIN32) += aio-win32.o
block-obj-y += block/
block-obj-y += qemu-io-cmds.o
block-obj-$(CONFIG_REPLICATION) += replication.o
@@ -51,8 +49,7 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
common-obj-$(CONFIG_LINUX) += fsdev/
common-obj-y += migration/
common-obj-y += qemu-char.o #aio.o
common-obj-y += page_cache.o
common-obj-y += page_cache.o #aio.o
common-obj-$(CONFIG_SPICE) += spice-qemu-char.o
@@ -118,47 +115,59 @@ ivshmem-server-obj-y = contrib/ivshmem-server/
libvhost-user-obj-y = contrib/libvhost-user/
######################################################################
trace-events-y = trace-events
trace-events-y += util/trace-events
trace-events-y += crypto/trace-events
trace-events-y += io/trace-events
trace-events-y += migration/trace-events
trace-events-y += block/trace-events
trace-events-y += hw/block/trace-events
trace-events-y += hw/char/trace-events
trace-events-y += hw/intc/trace-events
trace-events-y += hw/net/trace-events
trace-events-y += hw/virtio/trace-events
trace-events-y += hw/audio/trace-events
trace-events-y += hw/misc/trace-events
trace-events-y += hw/usb/trace-events
trace-events-y += hw/scsi/trace-events
trace-events-y += hw/nvram/trace-events
trace-events-y += hw/display/trace-events
trace-events-y += hw/input/trace-events
trace-events-y += hw/timer/trace-events
trace-events-y += hw/dma/trace-events
trace-events-y += hw/sparc/trace-events
trace-events-y += hw/sd/trace-events
trace-events-y += hw/isa/trace-events
trace-events-y += hw/mem/trace-events
trace-events-y += hw/i386/trace-events
trace-events-y += hw/9pfs/trace-events
trace-events-y += hw/ppc/trace-events
trace-events-y += hw/pci/trace-events
trace-events-y += hw/s390x/trace-events
trace-events-y += hw/vfio/trace-events
trace-events-y += hw/acpi/trace-events
trace-events-y += hw/arm/trace-events
trace-events-y += hw/alpha/trace-events
trace-events-y += ui/trace-events
trace-events-y += audio/trace-events
trace-events-y += net/trace-events
trace-events-y += target/arm/trace-events
trace-events-y += target/i386/trace-events
trace-events-y += target/sparc/trace-events
trace-events-y += target/s390x/trace-events
trace-events-y += target/ppc/trace-events
trace-events-y += qom/trace-events
trace-events-y += linux-user/trace-events
trace-events-y += qapi/trace-events
trace-events-subdirs =
trace-events-subdirs += util
trace-events-subdirs += crypto
trace-events-subdirs += io
trace-events-subdirs += migration
trace-events-subdirs += block
trace-events-subdirs += backends
trace-events-subdirs += hw/block
trace-events-subdirs += hw/block/dataplane
trace-events-subdirs += hw/char
trace-events-subdirs += hw/intc
trace-events-subdirs += hw/net
trace-events-subdirs += hw/virtio
trace-events-subdirs += hw/audio
trace-events-subdirs += hw/misc
trace-events-subdirs += hw/usb
trace-events-subdirs += hw/scsi
trace-events-subdirs += hw/nvram
trace-events-subdirs += hw/display
trace-events-subdirs += hw/input
trace-events-subdirs += hw/timer
trace-events-subdirs += hw/dma
trace-events-subdirs += hw/sparc
trace-events-subdirs += hw/sd
trace-events-subdirs += hw/isa
trace-events-subdirs += hw/mem
trace-events-subdirs += hw/i386
trace-events-subdirs += hw/i386/xen
trace-events-subdirs += hw/9pfs
trace-events-subdirs += hw/ppc
trace-events-subdirs += hw/pci
trace-events-subdirs += hw/s390x
trace-events-subdirs += hw/vfio
trace-events-subdirs += hw/acpi
trace-events-subdirs += hw/arm
trace-events-subdirs += hw/alpha
trace-events-subdirs += hw/xen
trace-events-subdirs += ui
trace-events-subdirs += audio
trace-events-subdirs += net
trace-events-subdirs += target/arm
trace-events-subdirs += target/i386
trace-events-subdirs += target/sparc
trace-events-subdirs += target/s390x
trace-events-subdirs += target/ppc
trace-events-subdirs += qom
trace-events-subdirs += linux-user
trace-events-subdirs += qapi
trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)
trace-obj-y = trace-root.o
trace-obj-y += $(trace-events-subdirs:%=%/trace.o)
trace-obj-$(CONFIG_TRACE_UST) += trace-ust-all.o
trace-obj-$(CONFIG_TRACE_DTRACE) += trace-dtrace-root.o
trace-obj-$(CONFIG_TRACE_DTRACE) += $(trace-events-subdirs:%=%/trace-dtrace.o)

View File

@@ -50,6 +50,7 @@ endif
$(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all
$(call quiet-command,$(TRACETOOL) \
--group=all \
--format=stap \
--backends=$(TRACE_BACKENDS) \
--binary=$(bindir)/$(QEMU_PROG) \
@@ -59,6 +60,7 @@ $(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all
$(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all
$(call quiet-command,$(TRACETOOL) \
--group=all \
--format=stap \
--backends=$(TRACE_BACKENDS) \
--binary=$(realpath .)/$(QEMU_PROG) \
@@ -68,6 +70,7 @@ $(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all
$(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all
$(call quiet-command,$(TRACETOOL) \
--group=all \
--format=simpletrace-stap \
--backends=$(TRACE_BACKENDS) \
--probe-prefix=qemu.$(TARGET_TYPE).$(TARGET_NAME) \
@@ -172,31 +175,36 @@ all-obj-y := $(obj-y)
target-obj-y :=
block-obj-y :=
common-obj-y :=
chardev-obj-y :=
include $(SRC_PATH)/Makefile.objs
dummy := $(call unnest-vars,,target-obj-y)
target-obj-y-save := $(target-obj-y)
dummy := $(call unnest-vars,.., \
block-obj-y \
block-obj-m \
chardev-obj-y \
crypto-obj-y \
crypto-aes-obj-y \
qom-obj-y \
io-obj-y \
common-obj-y \
common-obj-m)
common-obj-m \
trace-obj-y)
target-obj-y := $(target-obj-y-save)
all-obj-y += $(common-obj-y)
all-obj-y += $(target-obj-y)
all-obj-y += $(qom-obj-y)
all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)
all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y) $(chardev-obj-y)
all-obj-$(CONFIG_USER_ONLY) += $(crypto-aes-obj-y)
all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y)
all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y)
$(QEMU_PROG_BUILD): config-devices.mak
COMMON_LDADDS = $(trace-obj-y) ../libqemuutil.a ../libqemustub.a
# build either PROG or PROGW
$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
$(QEMU_PROG_BUILD): $(all-obj-y) $(COMMON_LDADDS)
$(call LINK, $(filter-out %.mak, $^))
ifdef CONFIG_DARWIN
$(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@,"REZ","$(TARGET_DIR)$@")

View File

@@ -28,6 +28,7 @@
#include "qemu/timer.h"
#include "sysemu/sysemu.h"
#include "qemu/cutils.h"
#include "sysemu/replay.h"
#define AUDIO_CAP "audio"
#include "audio_int.h"
@@ -1112,7 +1113,7 @@ static int audio_is_timer_needed (void)
static void audio_reset_timer (AudioState *s)
{
if (audio_is_timer_needed ()) {
timer_mod (s->ts,
timer_mod_anticipate_ns(s->ts,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
}
else {
@@ -1387,6 +1388,7 @@ static void audio_run_out (AudioState *s)
prev_rpos = hw->rpos;
played = hw->pcm_ops->run_out (hw, live);
replay_audio_out(&played);
if (audio_bug (AUDIO_FUNC, hw->rpos >= hw->samples)) {
dolog ("hw->rpos=%d hw->samples=%d played=%d\n",
hw->rpos, hw->samples, played);
@@ -1450,9 +1452,12 @@ static void audio_run_in (AudioState *s)
while ((hw = audio_pcm_hw_find_any_enabled_in (hw))) {
SWVoiceIn *sw;
int captured, min;
int captured = 0, min;
captured = hw->pcm_ops->run_in (hw);
if (replay_mode != REPLAY_MODE_PLAY) {
captured = hw->pcm_ops->run_in(hw);
}
replay_audio_in(&captured, hw->conv_buf, &hw->wpos, hw->samples);
min = audio_pcm_hw_find_min_in (hw);
hw->total_samples_captured += captured - min;

View File

@@ -166,4 +166,9 @@ int wav_start_capture (CaptureState *s, const char *path, int freq,
bool audio_is_cleaning_up(void);
void audio_cleanup(void);
void audio_sample_to_uint64(void *samples, int pos,
uint64_t *left, uint64_t *right);
void audio_sample_from_uint64(void *samples, int pos,
uint64_t left, uint64_t right);
#endif /* QEMU_AUDIO_H */

View File

@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/bswap.h"
#include "qemu/error-report.h"
#include "audio.h"
#define AUDIO_CAP "mixeng"
@@ -267,6 +268,37 @@ f_sample *mixeng_clip[2][2][2][3] = {
}
};
void audio_sample_to_uint64(void *samples, int pos,
uint64_t *left, uint64_t *right)
{
struct st_sample *sample = samples;
sample += pos;
#ifdef FLOAT_MIXENG
error_report(
"Coreaudio and floating point samples are not supported by replay yet");
abort();
#else
*left = sample->l;
*right = sample->r;
#endif
}
void audio_sample_from_uint64(void *samples, int pos,
uint64_t left, uint64_t right)
{
struct st_sample *sample = samples;
sample += pos;
#ifdef FLOAT_MIXENG
error_report(
"Coreaudio and floating point samples are not supported by replay yet");
abort();
#else
sample->l = left;
sample->r = right;
#endif
}
/*
* August 21, 1998
* Copyright 1998 Fabrice Bellard.

View File

@@ -38,10 +38,14 @@
#define AUDIO_CAP "sdl"
#include "audio_int.h"
#define USE_SEMAPHORE (SDL_MAJOR_VERSION < 2)
typedef struct SDLVoiceOut {
HWVoiceOut hw;
int live;
#if USE_SEMAPHORE
int rpos;
#endif
int decr;
} SDLVoiceOut;
@@ -53,8 +57,10 @@ static struct {
static struct SDLAudioState {
int exit;
#if USE_SEMAPHORE
SDL_mutex *mutex;
SDL_sem *sem;
#endif
int initialized;
bool driver_created;
} glob_sdl;
@@ -73,31 +79,45 @@ static void GCC_FMT_ATTR (1, 2) sdl_logerr (const char *fmt, ...)
static int sdl_lock (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_LockMutex (s->mutex)) {
sdl_logerr ("SDL_LockMutex for %s failed\n", forfn);
return -1;
}
#else
SDL_LockAudio();
#endif
return 0;
}
static int sdl_unlock (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_UnlockMutex (s->mutex)) {
sdl_logerr ("SDL_UnlockMutex for %s failed\n", forfn);
return -1;
}
#else
SDL_UnlockAudio();
#endif
return 0;
}
static int sdl_post (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_SemPost (s->sem)) {
sdl_logerr ("SDL_SemPost for %s failed\n", forfn);
return -1;
}
#endif
return 0;
}
#if USE_SEMAPHORE
static int sdl_wait (SDLAudioState *s, const char *forfn)
{
if (SDL_SemWait (s->sem)) {
@@ -106,6 +126,7 @@ static int sdl_wait (SDLAudioState *s, const char *forfn)
}
return 0;
}
#endif
static int sdl_unlock_and_post (SDLAudioState *s, const char *forfn)
{
@@ -246,6 +267,7 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
int to_mix, decr;
/* dolog ("in callback samples=%d\n", samples); */
#if USE_SEMAPHORE
sdl_wait (s, "sdl_callback");
if (s->exit) {
return;
@@ -264,6 +286,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
if (!sdl->live) {
goto again;
}
#else
if (s->exit || !sdl->live) {
break;
}
#endif
/* dolog ("in callback live=%d\n", live); */
to_mix = audio_MIN (samples, sdl->live);
@@ -274,7 +301,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
/* dolog ("in callback to_mix %d, chunk %d\n", to_mix, chunk); */
hw->clip (buf, src, chunk);
#if USE_SEMAPHORE
sdl->rpos = (sdl->rpos + chunk) % hw->samples;
#else
hw->rpos = (hw->rpos + chunk) % hw->samples;
#endif
to_mix -= chunk;
buf += chunk << hw->info.shift;
}
@@ -282,12 +313,21 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
sdl->live -= decr;
sdl->decr += decr;
#if USE_SEMAPHORE
again:
if (sdl_unlock (s, "sdl_callback")) {
return;
}
#endif
}
/* dolog ("done len=%d\n", len); */
#if (SDL_MAJOR_VERSION >= 2)
/* SDL2 does not clear the remaining buffer for us, so do it on our own */
if (samples) {
memset(buf, 0, samples << hw->info.shift);
}
#endif
}
static int sdl_write_out (SWVoiceOut *sw, void *buf, int len)
@@ -315,8 +355,12 @@ static int sdl_run_out (HWVoiceOut *hw, int live)
decr = audio_MIN (sdl->decr, live);
sdl->decr -= decr;
#if USE_SEMAPHORE
sdl->live = live - decr;
hw->rpos = sdl->rpos;
#else
sdl->live = live;
#endif
if (sdl->live > 0) {
sdl_unlock_and_post (s, "sdl_run_out");
@@ -405,6 +449,7 @@ static void *sdl_audio_init (void)
return NULL;
}
#if USE_SEMAPHORE
s->mutex = SDL_CreateMutex ();
if (!s->mutex) {
sdl_logerr ("Failed to create SDL mutex\n");
@@ -419,6 +464,7 @@ static void *sdl_audio_init (void)
SDL_QuitSubSystem (SDL_INIT_AUDIO);
return NULL;
}
#endif
s->driver_created = true;
return s;
@@ -428,8 +474,10 @@ static void sdl_audio_fini (void *opaque)
{
SDLAudioState *s = opaque;
sdl_close (s);
#if USE_SEMAPHORE
SDL_DestroySemaphore (s->sem);
SDL_DestroyMutex (s->mutex);
#endif
SDL_QuitSubSystem (SDL_INIT_AUDIO);
s->driver_created = false;
}

View File

@@ -1,7 +1,7 @@
common-obj-y += rng.o rng-egd.o
common-obj-$(CONFIG_POSIX) += rng-random.o
common-obj-y += msmouse.o testdev.o
common-obj-y += msmouse.o wctablet.o testdev.o
common-obj-$(CONFIG_BRLAPI) += baum.o
baum.o-cflags := $(SDL_CFLAGS)

View File

@@ -616,9 +616,9 @@ static void baum_chr_read(void *opaque)
}
}
static void baum_chr_free(Chardev *chr)
static void char_braille_finalize(Object *obj)
{
BaumChardev *baum = BAUM_CHARDEV(chr);
BaumChardev *baum = BAUM_CHARDEV(obj);
timer_free(baum->cellCount_timer);
if (baum->brlapi) {
@@ -659,23 +659,18 @@ static void char_braille_class_init(ObjectClass *oc, void *data)
cc->open = baum_chr_open;
cc->chr_write = baum_chr_write;
cc->chr_accept_input = baum_chr_accept_input;
cc->chr_free = baum_chr_free;
}
static const TypeInfo char_braille_type_info = {
.name = TYPE_CHARDEV_BRAILLE,
.parent = TYPE_CHARDEV,
.instance_size = sizeof(BaumChardev),
.instance_finalize = char_braille_finalize,
.class_init = char_braille_class_init,
};
static void register_types(void)
{
static const CharDriver driver = {
.kind = CHARDEV_BACKEND_KIND_BRAILLE,
};
register_char_driver(&driver);
type_register_static(&char_braille_type_info);
}

View File

@@ -139,9 +139,9 @@ static int msmouse_chr_write(struct Chardev *s, const uint8_t *buf, int len)
return len;
}
static void msmouse_chr_free(struct Chardev *chr)
static void char_msmouse_finalize(Object *obj)
{
MouseChardev *mouse = MOUSE_CHARDEV(chr);
MouseChardev *mouse = MOUSE_CHARDEV(obj);
qemu_input_handler_unregister(mouse->hs);
}
@@ -172,23 +172,18 @@ static void char_msmouse_class_init(ObjectClass *oc, void *data)
cc->open = msmouse_chr_open;
cc->chr_write = msmouse_chr_write;
cc->chr_accept_input = msmouse_chr_accept_input;
cc->chr_free = msmouse_chr_free;
}
static const TypeInfo char_msmouse_type_info = {
.name = TYPE_CHARDEV_MSMOUSE,
.parent = TYPE_CHARDEV,
.instance_size = sizeof(MouseChardev),
.instance_finalize = char_msmouse_finalize,
.class_init = char_msmouse_class_init,
};
static void register_types(void)
{
static const CharDriver driver = {
.kind = CHARDEV_BACKEND_KIND_MSMOUSE,
};
register_char_driver(&driver);
type_register_static(&char_msmouse_type_info);
}

View File

@@ -123,11 +123,6 @@ static const TypeInfo char_testdev_type_info = {
static void register_types(void)
{
static const CharDriver driver = {
.kind = CHARDEV_BACKEND_KIND_TESTDEV,
};
register_char_driver(&driver);
type_register_static(&char_testdev_type_info);
}

10
backends/trace-events Normal file
View File

@@ -0,0 +1,10 @@
# See docs/tracing.txt for syntax documentation.
# backends/wctablet.c
wct_init(void) ""
wct_cmd_re(void) ""
wct_cmd_st(void) ""
wct_cmd_sp(void) ""
wct_cmd_ts(int input) "0x%02x"
wct_cmd_other(const char *cmd) "%s"
wct_speed(int speed) "%d"

369
backends/wctablet.c Normal file
View File

@@ -0,0 +1,369 @@
/*
* QEMU Wacom Penpartner serial tablet emulation
*
* some protocol details:
* http://linuxwacom.sourceforge.net/wiki/index.php/Serial_Protocol_IV
*
* Copyright (c) 2016 Anatoli Huseu1
* Copyright (c) 2016,17 Gerd Hoffmann
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "ui/console.h"
#include "ui/input.h"
#include "trace.h"
#define WC_OUTPUT_BUF_MAX_LEN 512
#define WC_COMMAND_MAX_LEN 60
#define WC_L7(n) ((n) & 127)
#define WC_M7(n) (((n) >> 7) & 127)
#define WC_H2(n) ((n) >> 14)
#define WC_L4(n) ((n) & 15)
#define WC_H4(n) (((n) >> 4) & 15)
/* Model string and config string */
#define WC_MODEL_STRING_LENGTH 18
uint8_t WC_MODEL_STRING[WC_MODEL_STRING_LENGTH + 1] = "~#CT-0045R,V1.3-5,";
#define WC_CONFIG_STRING_LENGTH 8
uint8_t WC_CONFIG_STRING[WC_CONFIG_STRING_LENGTH + 1] = "96,N,8,0";
#define WC_FULL_CONFIG_STRING_LENGTH 61
uint8_t WC_FULL_CONFIG_STRING[WC_FULL_CONFIG_STRING_LENGTH + 1] = {
0x5c, 0x39, 0x36, 0x2c, 0x4e, 0x2c, 0x38, 0x2c,
0x31, 0x28, 0x01, 0x24, 0x57, 0x41, 0x43, 0x30,
0x30, 0x34, 0x35, 0x5c, 0x5c, 0x50, 0x45, 0x4e, 0x5c,
0x57, 0x41, 0x43, 0x30, 0x30, 0x30, 0x30, 0x5c,
0x54, 0x61, 0x62, 0x6c, 0x65, 0x74, 0x0d, 0x0a,
0x43, 0x54, 0x2d, 0x30, 0x30, 0x34, 0x35, 0x52,
0x2c, 0x56, 0x31, 0x2e, 0x33, 0x2d, 0x35, 0x0d,
0x0a, 0x45, 0x37, 0x29
};
/* This structure is used to save private info for Wacom Tablet. */
typedef struct {
Chardev parent;
QemuInputHandlerState *hs;
/* Query string from serial */
uint8_t query[100];
int query_index;
/* Command to be sent to serial port */
uint8_t outbuf[WC_OUTPUT_BUF_MAX_LEN];
int outlen;
int line_speed;
bool send_events;
int axis[INPUT_AXIS__MAX];
bool btns[INPUT_BUTTON__MAX];
} TabletChardev;
#define TYPE_CHARDEV_WCTABLET "chardev-wctablet"
#define WCTABLET_CHARDEV(obj) \
OBJECT_CHECK(TabletChardev, (obj), TYPE_CHARDEV_WCTABLET)
static void wctablet_chr_accept_input(Chardev *chr);
static void wctablet_shift_input(TabletChardev *tablet, int count)
{
tablet->query_index -= count;
memmove(tablet->query, tablet->query + count, tablet->query_index);
tablet->query[tablet->query_index] = 0;
}
static void wctablet_queue_output(TabletChardev *tablet, uint8_t *buf, int count)
{
if (tablet->outlen + count > sizeof(tablet->outbuf)) {
return;
}
memcpy(tablet->outbuf + tablet->outlen, buf, count);
tablet->outlen += count;
wctablet_chr_accept_input(CHARDEV(tablet));
}
static void wctablet_reset(TabletChardev *tablet)
{
/* clear buffers */
tablet->query_index = 0;
tablet->outlen = 0;
/* reset state */
tablet->send_events = false;
}
static void wctablet_queue_event(TabletChardev *tablet)
{
uint8_t codes[8] = { 0xe0, 0, 0, 0, 0, 0, 0 };
if (tablet->line_speed != 9600) {
return;
}
int newX = tablet->axis[INPUT_AXIS_X] * 0.1537;
int nexY = tablet->axis[INPUT_AXIS_Y] * 0.1152;
codes[0] = codes[0] | WC_H2(newX);
codes[1] = codes[1] | WC_M7(newX);
codes[2] = codes[2] | WC_L7(newX);
codes[3] = codes[3] | WC_H2(nexY);
codes[4] = codes[4] | WC_M7(nexY);
codes[5] = codes[5] | WC_L7(nexY);
if (tablet->btns[INPUT_BUTTON_LEFT]) {
codes[0] = 0xa0;
}
wctablet_queue_output(tablet, codes, 7);
}
static void wctablet_input_event(DeviceState *dev, QemuConsole *src,
InputEvent *evt)
{
TabletChardev *tablet = (TabletChardev *)dev;
InputMoveEvent *move;
InputBtnEvent *btn;
switch (evt->type) {
case INPUT_EVENT_KIND_ABS:
move = evt->u.abs.data;
tablet->axis[move->axis] = move->value;
break;
case INPUT_EVENT_KIND_BTN:
btn = evt->u.btn.data;
tablet->btns[btn->button] = btn->down;
break;
default:
/* keep gcc happy */
break;
}
}
static void wctablet_input_sync(DeviceState *dev)
{
TabletChardev *tablet = (TabletChardev *)dev;
if (tablet->send_events) {
wctablet_queue_event(tablet);
}
}
static QemuInputHandler wctablet_handler = {
.name = "QEMU Wacome Pen Tablet",
.mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_ABS,
.event = wctablet_input_event,
.sync = wctablet_input_sync,
};
static void wctablet_chr_accept_input(Chardev *chr)
{
TabletChardev *tablet = WCTABLET_CHARDEV(chr);
int len, canWrite;
canWrite = qemu_chr_be_can_write(chr);
len = canWrite;
if (len > tablet->outlen) {
len = tablet->outlen;
}
if (len) {
qemu_chr_be_write(chr, tablet->outbuf, len);
tablet->outlen -= len;
if (tablet->outlen) {
memmove(tablet->outbuf, tablet->outbuf + len, tablet->outlen);
}
}
}
static int wctablet_chr_write(struct Chardev *chr,
const uint8_t *buf, int len)
{
TabletChardev *tablet = WCTABLET_CHARDEV(chr);
unsigned int i, clen;
char *pos;
if (tablet->line_speed != 9600) {
return len;
}
for (i = 0; i < len && tablet->query_index < sizeof(tablet->query) - 1; i++) {
tablet->query[tablet->query_index++] = buf[i];
}
tablet->query[tablet->query_index] = 0;
while (tablet->query_index > 0 && (tablet->query[0] == '@' ||
tablet->query[0] == '\r' ||
tablet->query[0] == '\n')) {
wctablet_shift_input(tablet, 1);
}
if (!tablet->query_index) {
return len;
}
if (strncmp((char *)tablet->query, "~#", 2) == 0) {
/* init / detect sequence */
trace_wct_init();
wctablet_shift_input(tablet, 2);
wctablet_queue_output(tablet, WC_MODEL_STRING,
WC_MODEL_STRING_LENGTH);
return len;
}
/* detect line */
pos = strchr((char *)tablet->query, '\r');
if (!pos) {
pos = strchr((char *)tablet->query, '\n');
}
if (!pos) {
return len;
}
clen = pos - (char *)tablet->query;
/* process commands */
if (strncmp((char *)tablet->query, "RE", 2) == 0 &&
clen == 2) {
trace_wct_cmd_re();
wctablet_shift_input(tablet, 3);
wctablet_queue_output(tablet, WC_CONFIG_STRING,
WC_CONFIG_STRING_LENGTH);
} else if (strncmp((char *)tablet->query, "ST", 2) == 0 &&
clen == 2) {
trace_wct_cmd_st();
wctablet_shift_input(tablet, 3);
tablet->send_events = true;
wctablet_queue_event(tablet);
} else if (strncmp((char *)tablet->query, "SP", 2) == 0 &&
clen == 2) {
trace_wct_cmd_sp();
wctablet_shift_input(tablet, 3);
tablet->send_events = false;
} else if (strncmp((char *)tablet->query, "TS", 2) == 0 &&
clen == 3) {
unsigned int input = tablet->query[2];
uint8_t codes[7] = {
0xa3,
((input & 0x80) == 0) ? 0x7e : 0x7f,
(((WC_H4(input) & 0x7) ^ 0x5) << 4) | (WC_L4(input) ^ 0x7),
0x03,
0x7f,
0x7f,
0x00,
};
trace_wct_cmd_ts(input);
wctablet_shift_input(tablet, 4);
wctablet_queue_output(tablet, codes, 7);
} else {
tablet->query[clen] = 0; /* terminate line for printing */
trace_wct_cmd_other((char *)tablet->query);
wctablet_shift_input(tablet, clen + 1);
}
return len;
}
static int wctablet_chr_ioctl(Chardev *chr, int cmd, void *arg)
{
TabletChardev *tablet = WCTABLET_CHARDEV(chr);
QEMUSerialSetParams *ssp;
switch (cmd) {
case CHR_IOCTL_SERIAL_SET_PARAMS:
ssp = arg;
if (tablet->line_speed != ssp->speed) {
trace_wct_speed(ssp->speed);
wctablet_reset(tablet);
tablet->line_speed = ssp->speed;
}
break;
default:
return -ENOTSUP;
}
return 0;
}
static void wctablet_chr_finalize(Object *obj)
{
TabletChardev *tablet = WCTABLET_CHARDEV(obj);
qemu_input_handler_unregister(tablet->hs);
g_free(tablet);
}
static void wctablet_chr_open(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
TabletChardev *tablet = WCTABLET_CHARDEV(chr);
*be_opened = true;
/* init state machine */
memcpy(tablet->outbuf, WC_FULL_CONFIG_STRING, WC_FULL_CONFIG_STRING_LENGTH);
tablet->outlen = WC_FULL_CONFIG_STRING_LENGTH;
tablet->query_index = 0;
tablet->hs = qemu_input_handler_register((DeviceState *)tablet,
&wctablet_handler);
}
static void wctablet_chr_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->open = wctablet_chr_open;
cc->chr_write = wctablet_chr_write;
cc->chr_ioctl = wctablet_chr_ioctl;
cc->chr_accept_input = wctablet_chr_accept_input;
}
static const TypeInfo wctablet_type_info = {
.name = TYPE_CHARDEV_WCTABLET,
.parent = TYPE_CHARDEV,
.instance_size = sizeof(TabletChardev),
.instance_finalize = wctablet_chr_finalize,
.class_init = wctablet_chr_class_init,
};
static void register_types(void)
{
type_register_static(&wctablet_type_info);
}
type_init(register_types);

View File

@@ -29,7 +29,7 @@
#include "exec/cpu-common.h"
#include "sysemu/kvm.h"
#include "sysemu/balloon.h"
#include "trace.h"
#include "trace-root.h"
#include "qmp-commands.h"
#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qjson.h"

870
block.c

File diff suppressed because it is too large Load Diff

View File

@@ -64,7 +64,7 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
retry = false;
QLIST_FOREACH(req, &job->inflight_reqs, list) {
if (end > req->start && start < req->end) {
qemu_co_queue_wait(&req->wait_queue);
qemu_co_queue_wait(&req->wait_queue, NULL);
retry = true;
break;
}
@@ -618,14 +618,24 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
job = block_job_create(job_id, &backup_job_driver, bs, speed,
creation_flags, cb, opaque, errp);
/* job->common.len is fixed, so we can't allow resize */
job = block_job_create(job_id, &backup_job_driver, bs,
BLK_PERM_CONSISTENT_READ,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
speed, creation_flags, cb, opaque, errp);
if (!job) {
goto error;
}
job->target = blk_new();
blk_insert_bs(job->target, target);
/* The target must match the source in size, so no resize here either */
job->target = blk_new(BLK_PERM_WRITE,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
ret = blk_insert_bs(job->target, target, errp);
if (ret < 0) {
goto error;
}
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
@@ -652,7 +662,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}
block_job_add_bdrv(&job->common, target);
/* Required permissions are already taken with target's blk_new() */
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
job->common.len = len;
block_job_txn_add_job(txn, &job->common);

View File

@@ -405,12 +405,6 @@ out:
return ret;
}
static void error_callback_bh(void *opaque)
{
Coroutine *co = opaque;
qemu_coroutine_enter(co);
}
static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
{
BDRVBlkdebugState *s = bs->opaque;
@@ -423,8 +417,7 @@ static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
}
if (!immediately) {
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
qemu_coroutine_self());
aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
qemu_coroutine_yield();
}
@@ -670,7 +663,7 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
{
return bdrv_truncate(bs->file->bs, offset);
return bdrv_truncate(bs->file, offset);
}
static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
@@ -741,6 +734,8 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_file_open = blkdebug_open,
.bdrv_close = blkdebug_close,
.bdrv_reopen_prepare = blkdebug_reopen_prepare,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkdebug_getlength,
.bdrv_truncate = blkdebug_truncate,
.bdrv_refresh_filename = blkdebug_refresh_filename,

View File

@@ -60,7 +60,7 @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
static void blkreplay_bh_cb(void *opaque)
{
Request *req = opaque;
qemu_coroutine_enter(req->co);
aio_co_wake(req->co);
qemu_bh_delete(req->bh);
g_free(req);
}
@@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = {
.bdrv_file_open = blkreplay_open,
.bdrv_close = blkreplay_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkreplay_getlength,
.bdrv_co_preadv = blkreplay_co_preadv,

View File

@@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = {
.bdrv_parse_filename = blkverify_parse_filename,
.bdrv_file_open = blkverify_open,
.bdrv_close = blkverify_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkverify_getlength,
.bdrv_refresh_filename = blkverify_refresh_filename,

View File

@@ -59,6 +59,9 @@ struct BlockBackend {
bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
uint64_t perm;
uint64_t shared_perm;
bool allow_write_beyond_eof;
NotifierList remove_bs_notifiers, insert_bs_notifiers;
@@ -77,6 +80,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
static char *blk_get_attached_dev_id(BlockBackend *blk);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -99,6 +103,25 @@ static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
static char *blk_root_get_parent_desc(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
char *dev_id;
if (blk->name) {
return g_strdup(blk->name);
}
dev_id = blk_get_attached_dev_id(blk);
if (*dev_id) {
return dev_id;
} else {
/* TODO Callback into the BB owner for something more detailed */
g_free(dev_id);
return g_strdup("a block device");
}
}
static const char *blk_root_get_name(BdrvChild *child)
{
return blk_name(child->opaque);
@@ -110,6 +133,7 @@ static const BdrvChildRole child_root = {
.change_media = blk_root_change_media,
.resize = blk_root_resize,
.get_name = blk_root_get_name,
.get_parent_desc = blk_root_get_parent_desc,
.drained_begin = blk_root_drained_begin,
.drained_end = blk_root_drained_end,
@@ -117,15 +141,23 @@ static const BdrvChildRole child_root = {
/*
* Create a new BlockBackend with a reference count of one.
* Store an error through @errp on failure, unless it's null.
*
* @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
* to request for a block driver node that is attached to this BlockBackend.
* @shared_perm is a bitmask which describes which permissions may be granted
* to other users of the attached node.
* Both sets of permissions can be changed later using blk_set_perm().
*
* Return the new BlockBackend on success, null on failure.
*/
BlockBackend *blk_new(void)
BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
{
BlockBackend *blk;
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
blk->perm = perm;
blk->shared_perm = shared_perm;
blk_set_enable_write_cache(blk, true);
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
@@ -155,15 +187,33 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
{
BlockBackend *blk;
BlockDriverState *bs;
uint64_t perm;
blk = blk_new();
/* blk_new_open() is mainly used in .bdrv_create implementations and the
* tools where sharing isn't a concern because the BDS stays private, so we
* just request permission according to the flags.
*
* The exceptions are xen_disk and blockdev_init(); in these cases, the
* caller of blk_new_open() doesn't make use of the permissions, but they
* shouldn't hurt either. We can still share everything here because the
* guest devices will add their own blockers if they can't share. */
perm = BLK_PERM_CONSISTENT_READ;
if (flags & BDRV_O_RDWR) {
perm |= BLK_PERM_WRITE;
}
if (flags & BDRV_O_RESIZE) {
perm |= BLK_PERM_RESIZE;
}
blk = blk_new(perm, BLK_PERM_ALL);
bs = bdrv_open(filename, reference, options, flags, errp);
if (!bs) {
blk_unref(blk);
return NULL;
}
blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
perm, BLK_PERM_ALL, blk, &error_abort);
return blk;
}
@@ -495,16 +545,49 @@ void blk_remove_bs(BlockBackend *blk)
/*
* Associates a new BlockDriverState with @blk.
*/
void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
blk->perm, blk->shared_perm, blk, errp);
if (blk->root == NULL) {
return -EPERM;
}
bdrv_ref(bs);
blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
if (blk->public.throttle_state) {
throttle_timers_attach_aio_context(
&blk->public.throttle_timers, bdrv_get_aio_context(bs));
}
return 0;
}
/*
* Sets the permission bitmasks that the user of the BlockBackend needs.
*/
int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
Error **errp)
{
int ret;
if (blk->root) {
ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
if (ret < 0) {
return ret;
}
}
blk->perm = perm;
blk->shared_perm = shared_perm;
return 0;
}
void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
{
*perm = blk->perm;
*shared_perm = blk->shared_perm;
}
static int blk_do_attach_dev(BlockBackend *blk, void *dev)
@@ -553,6 +636,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
blk->guest_block_size = 512;
blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
blk_unref(blk);
}
@@ -620,19 +704,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
/*
* Notify @blk's attached device model of media change.
* If @load is true, notify of media load.
* Else, notify of media eject.
*
* If @load is true, notify of media load. This action can fail, meaning that
* the medium cannot be loaded. @errp is set then.
*
* If @load is false, notify of media eject. This can never fail.
*
* Also send DEVICE_TRAY_MOVED events as appropriate.
*/
void blk_dev_change_media_cb(BlockBackend *blk, bool load)
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
{
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
bool tray_was_open, tray_is_open;
Error *local_err = NULL;
assert(!blk->legacy_dev);
tray_was_open = blk_dev_is_tray_open(blk);
blk->dev_ops->change_media_cb(blk->dev_opaque, load);
blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
if (local_err) {
assert(load == true);
error_propagate(errp, local_err);
return;
}
tray_is_open = blk_dev_is_tray_open(blk);
if (tray_was_open != tray_is_open) {
@@ -646,7 +740,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
static void blk_root_change_media(BdrvChild *child, bool load)
{
blk_dev_change_media_cb(child->opaque, load);
blk_dev_change_media_cb(child->opaque, load, NULL);
}
/*
@@ -880,7 +974,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
{
QEMUIOVector qiov;
struct iovec iov;
Coroutine *co;
BlkRwCo rwco;
iov = (struct iovec) {
@@ -897,9 +990,14 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
.ret = NOT_DONE,
};
co = qemu_coroutine_create(co_entry, &rwco);
qemu_coroutine_enter(co);
BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
co_entry(&rwco);
} else {
Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
qemu_coroutine_enter(co);
BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
}
return rwco.ret;
}
@@ -979,7 +1077,6 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
static void blk_aio_complete_bh(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
assert(acb->has_returned);
blk_aio_complete(acb);
}
@@ -1602,7 +1699,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
return -ENOMEDIUM;
}
return bdrv_truncate(blk_bs(blk), offset);
return bdrv_truncate(blk->root, offset);
}
static void blk_pdiscard_entry(void *opaque)

View File

@@ -104,6 +104,12 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
struct bochs_header bochs;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
bs->read_only = true; /* no write support yet */
ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
@@ -287,6 +293,7 @@ static BlockDriver bdrv_bochs = {
.instance_size = sizeof(BDRVBochsState),
.bdrv_probe = bochs_probe,
.bdrv_open = bochs_open,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_refresh_limits = bochs_refresh_limits,
.bdrv_co_preadv = bochs_co_preadv,
.bdrv_close = bochs_close,

View File

@@ -66,6 +66,12 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
uint32_t offsets_size, max_compressed_block_size = 1, i;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
bs->read_only = true;
/* read header */
@@ -284,6 +290,7 @@ static BlockDriver bdrv_cloop = {
.instance_size = sizeof(BDRVCloopState),
.bdrv_probe = cloop_probe,
.bdrv_open = cloop_open,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_refresh_limits = cloop_refresh_limits,
.bdrv_co_preadv = cloop_co_preadv,
.bdrv_close = cloop_close,

View File

@@ -36,6 +36,7 @@ typedef struct CommitBlockJob {
BlockJob common;
RateLimit limit;
BlockDriverState *active;
BlockDriverState *commit_top_bs;
BlockBackend *top;
BlockBackend *base;
BlockdevOnError on_error;
@@ -83,12 +84,23 @@ static void commit_complete(BlockJob *job, void *opaque)
BlockDriverState *active = s->active;
BlockDriverState *top = blk_bs(s->top);
BlockDriverState *base = blk_bs(s->base);
BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
int ret = data->ret;
bool remove_commit_top_bs = false;
/* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
* the normal backing chain can be restored. */
blk_unref(s->base);
if (!block_job_is_cancelled(&s->common) && ret == 0) {
/* success */
ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
s->backing_file_str);
} else if (overlay_bs) {
/* XXX Can (or should) we somehow keep 'consistent read' blocked even
* after the failed/cancelled commit job is gone? If we already wrote
* something to base, the intermediate images aren't valid any more. */
remove_commit_top_bs = true;
}
/* restore base open flags here if appropriate (e.g., change the base back
@@ -102,9 +114,15 @@ static void commit_complete(BlockJob *job, void *opaque)
}
g_free(s->backing_file_str);
blk_unref(s->top);
blk_unref(s->base);
block_job_completed(&s->common, ret);
g_free(data);
/* If bdrv_drop_intermediate() didn't already do that, remove the commit
* filter driver from the backing chain. Do this as the final step so that
* the 'consistent read' permission can be granted. */
if (remove_commit_top_bs) {
bdrv_set_backing_hd(overlay_bs, top, &error_abort);
}
}
static void coroutine_fn commit_run(void *opaque)
@@ -208,10 +226,38 @@ static const BlockJobDriver commit_job_driver = {
.start = commit_run,
};
static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
}
static void bdrv_commit_top_close(BlockDriverState *bs)
{
}
static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
*nperm = 0;
*nshared = BLK_PERM_ALL;
}
/* Dummy node that provides consistent read to its users without requiring it
* from its backing file and that allows writes on the backing file chain. */
static BlockDriver bdrv_commit_top = {
.format_name = "commit_top",
.bdrv_co_preadv = bdrv_commit_top_preadv,
.bdrv_close = bdrv_commit_top_close,
.bdrv_child_perm = bdrv_commit_top_child_perm,
};
void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
Error **errp)
const char *filter_node_name, Error **errp)
{
CommitBlockJob *s;
BlockReopenQueue *reopen_queue = NULL;
@@ -219,7 +265,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
int orig_base_flags;
BlockDriverState *iter;
BlockDriverState *overlay_bs;
BlockDriverState *commit_top_bs = NULL;
Error *local_err = NULL;
int ret;
assert(top != bs);
if (top == base) {
@@ -234,8 +282,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
return;
}
s = block_job_create(job_id, &commit_job_driver, bs, speed,
BLOCK_JOB_DEFAULT, NULL, NULL, errp);
s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
return;
}
@@ -256,30 +304,70 @@ void commit_start(const char *job_id, BlockDriverState *bs,
bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
block_job_unref(&s->common);
return;
goto fail;
}
}
/* Insert commit_top block node above top, so we can block consistent read
* on the backing chain below it */
commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
errp);
if (commit_top_bs == NULL) {
goto fail;
}
bdrv_set_backing_hd(commit_top_bs, top, &error_abort);
bdrv_set_backing_hd(overlay_bs, commit_top_bs, &error_abort);
s->commit_top_bs = commit_top_bs;
bdrv_unref(commit_top_bs);
/* Block all nodes between top and base, because they will
* disappear from the chain after this operation. */
assert(bdrv_chain_contains(top, base));
for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
block_job_add_bdrv(&s->common, iter);
for (iter = top; iter != base; iter = backing_bs(iter)) {
/* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
* at s->base (if writes are blocked for a node, they are also blocked
* for its backing file). The other options would be a second filter
* driver above s->base. */
ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
errp);
if (ret < 0) {
goto fail;
}
}
ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
if (ret < 0) {
goto fail;
}
/* overlay_bs must be blocked because it needs to be modified to
* update the backing image string, but if it's the root node then
* don't block it again */
if (bs != overlay_bs) {
block_job_add_bdrv(&s->common, overlay_bs);
* update the backing image string. */
ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
if (ret < 0) {
goto fail;
}
s->base = blk_new();
blk_insert_bs(s->base, base);
s->base = blk_new(BLK_PERM_CONSISTENT_READ
| BLK_PERM_WRITE
| BLK_PERM_RESIZE,
BLK_PERM_CONSISTENT_READ
| BLK_PERM_GRAPH_MOD
| BLK_PERM_WRITE_UNCHANGED);
ret = blk_insert_bs(s->base, base, errp);
if (ret < 0) {
goto fail;
}
s->top = blk_new();
blk_insert_bs(s->top, top);
/* Required permissions are already taken with block_job_add_bdrv() */
s->top = blk_new(0, BLK_PERM_ALL);
blk_insert_bs(s->top, top, errp);
if (ret < 0) {
goto fail;
}
s->active = bs;
@@ -292,6 +380,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
trace_commit_start(bs, base, top, s);
block_job_start(&s->common);
return;
fail:
if (s->base) {
blk_unref(s->base);
}
if (s->top) {
blk_unref(s->top);
}
if (commit_top_bs) {
bdrv_set_backing_hd(overlay_bs, top, &error_abort);
}
block_job_unref(&s->common);
}
@@ -301,11 +402,14 @@ void commit_start(const char *job_id, BlockDriverState *bs,
int bdrv_commit(BlockDriverState *bs)
{
BlockBackend *src, *backing;
BlockDriverState *backing_file_bs = NULL;
BlockDriverState *commit_top_bs = NULL;
BlockDriver *drv = bs->drv;
int64_t sector, total_sectors, length, backing_length;
int n, ro, open_flags;
int ret = 0;
uint8_t *buf = NULL;
Error *local_err = NULL;
if (!drv)
return -ENOMEDIUM;
@@ -328,11 +432,33 @@ int bdrv_commit(BlockDriverState *bs)
}
}
src = blk_new();
blk_insert_bs(src, bs);
src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
backing = blk_new();
blk_insert_bs(backing, bs->backing->bs);
ret = blk_insert_bs(src, bs, &local_err);
if (ret < 0) {
error_report_err(local_err);
goto ro_cleanup;
}
/* Insert commit_top block node above backing, so we can write to it */
backing_file_bs = backing_bs(bs);
commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
&local_err);
if (commit_top_bs == NULL) {
error_report_err(local_err);
goto ro_cleanup;
}
bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
ret = blk_insert_bs(backing, backing_file_bs, &local_err);
if (ret < 0) {
error_report_err(local_err);
goto ro_cleanup;
}
length = blk_getlength(src);
if (length < 0) {
@@ -404,8 +530,12 @@ int bdrv_commit(BlockDriverState *bs)
ro_cleanup:
qemu_vfree(buf);
blk_unref(src);
blk_unref(backing);
if (backing_file_bs) {
bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
}
bdrv_unref(commit_top_bs);
blk_unref(src);
if (ro) {
/* ignoring error return here */

View File

@@ -300,6 +300,12 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
QCryptoBlockOpenOptions *open_opts = NULL;
unsigned int cflags = 0;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
@@ -383,7 +389,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
offset += payload_offset;
return bdrv_truncate(bs->file->bs, offset);
return bdrv_truncate(bs->file, offset);
}
static void block_crypto_close(BlockDriverState *bs)
@@ -622,6 +628,7 @@ BlockDriver bdrv_crypto_luks = {
.bdrv_probe = block_crypto_probe_luks,
.bdrv_open = block_crypto_open_luks,
.bdrv_close = block_crypto_close,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = block_crypto_create_luks,
.bdrv_truncate = block_crypto_truncate,
.create_opts = &block_crypto_create_opts_luks,

View File

@@ -135,6 +135,7 @@ typedef struct BDRVCURLState {
char *cookie;
bool accept_range;
AioContext *aio_context;
QemuMutex mutex;
char *username;
char *password;
char *proxyusername;
@@ -333,6 +334,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
return FIND_RET_NONE;
}
/* Called with s->mutex held. */
static void curl_multi_check_completion(BDRVCURLState *s)
{
int msgs_in_queue;
@@ -374,7 +376,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
continue;
}
qemu_mutex_unlock(&s->mutex);
acb->common.cb(acb->common.opaque, -EPROTO);
qemu_mutex_lock(&s->mutex);
qemu_aio_unref(acb);
state->acb[i] = NULL;
}
@@ -386,9 +390,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
}
}
static void curl_multi_do(void *arg)
/* Called with s->mutex held. */
static void curl_multi_do_locked(CURLState *s)
{
CURLState *s = (CURLState *)arg;
CURLSocket *socket, *next_socket;
int running;
int r;
@@ -406,12 +410,23 @@ static void curl_multi_do(void *arg)
}
}
static void curl_multi_do(void *arg)
{
CURLState *s = (CURLState *)arg;
qemu_mutex_lock(&s->s->mutex);
curl_multi_do_locked(s);
qemu_mutex_unlock(&s->s->mutex);
}
static void curl_multi_read(void *arg)
{
CURLState *s = (CURLState *)arg;
curl_multi_do(arg);
qemu_mutex_lock(&s->s->mutex);
curl_multi_do_locked(s);
curl_multi_check_completion(s->s);
qemu_mutex_unlock(&s->s->mutex);
}
static void curl_multi_timeout_do(void *arg)
@@ -424,9 +439,11 @@ static void curl_multi_timeout_do(void *arg)
return;
}
qemu_mutex_lock(&s->mutex);
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
curl_multi_check_completion(s);
qemu_mutex_unlock(&s->mutex);
#else
abort();
#endif
@@ -759,6 +776,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
curl_easy_cleanup(state->curl);
state->curl = NULL;
qemu_mutex_init(&s->mutex);
curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
qemu_opts_del(opts);
@@ -784,13 +802,17 @@ static void curl_readv_bh_cb(void *p)
{
CURLState *state;
int running;
int ret = -EINPROGRESS;
CURLAIOCB *acb = p;
BDRVCURLState *s = acb->common.bs->opaque;
BlockDriverState *bs = acb->common.bs;
BDRVCURLState *s = bs->opaque;
size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
size_t end;
qemu_mutex_lock(&s->mutex);
// In case we have the requested data already (e.g. read-ahead),
// we can just call the callback and be done.
switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
@@ -798,7 +820,7 @@ static void curl_readv_bh_cb(void *p)
qemu_aio_unref(acb);
// fall through
case FIND_RET_WAIT:
return;
goto out;
default:
break;
}
@@ -806,9 +828,8 @@ static void curl_readv_bh_cb(void *p)
// No cache found, so let's start a new request
state = curl_init_state(acb->common.bs, s);
if (!state) {
acb->common.cb(acb->common.opaque, -EIO);
qemu_aio_unref(acb);
return;
ret = -EIO;
goto out;
}
acb->start = 0;
@@ -822,9 +843,8 @@ static void curl_readv_bh_cb(void *p)
state->orig_buf = g_try_malloc(state->buf_len);
if (state->buf_len && state->orig_buf == NULL) {
curl_clean_state(state);
acb->common.cb(acb->common.opaque, -ENOMEM);
qemu_aio_unref(acb);
return;
ret = -ENOMEM;
goto out;
}
state->acb[0] = acb;
@@ -837,6 +857,13 @@ static void curl_readv_bh_cb(void *p)
/* Tell curl it needs to kick things off */
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
out:
qemu_mutex_unlock(&s->mutex);
if (ret != -EINPROGRESS) {
acb->common.cb(acb->common.opaque, ret);
qemu_aio_unref(acb);
}
}
static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
@@ -861,6 +888,7 @@ static void curl_close(BlockDriverState *bs)
DPRINTF("CURL: Close\n");
curl_detach_aio_context(bs);
qemu_mutex_destroy(&s->mutex);
g_free(s->cookie);
g_free(s->url);

View File

@@ -413,6 +413,12 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
int64_t offset;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
block_module_load_one("dmg-bz2");
bs->read_only = true;
@@ -691,6 +697,7 @@ static BlockDriver bdrv_dmg = {
.bdrv_probe = dmg_probe,
.bdrv_open = dmg_open,
.bdrv_refresh_limits = dmg_refresh_limits,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_preadv = dmg_co_preadv,
.bdrv_close = dmg_close,
};

View File

@@ -1591,18 +1591,17 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
#endif
}
if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
goto out_close;
}
switch (prealloc) {
#ifdef CONFIG_POSIX_FALLOCATE
case PREALLOC_MODE_FALLOC:
/* posix_fallocate() doesn't set errno. */
/*
* Truncating before posix_fallocate() makes it about twice slower on
* file systems that do not support fallocate(), trying to check if a
* block is allocated before allocating it, so don't do that here.
*/
result = -posix_fallocate(fd, 0, total_size);
if (result != 0) {
/* posix_fallocate() doesn't set errno. */
error_setg_errno(errp, -result,
"Could not preallocate data for the new file");
}
@@ -1610,6 +1609,17 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
#endif
case PREALLOC_MODE_FULL:
{
/*
* Knowing the final size from the beginning could allow the file
* system driver to do less allocations and possibly avoid
* fragmentation of the file.
*/
if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
goto out_close;
}
int64_t num = 0, left = total_size;
buf = g_malloc0(65536);
@@ -1636,6 +1646,10 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
break;
}
case PREALLOC_MODE_OFF:
if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
}
break;
default:
result = -EINVAL;

View File

@@ -698,13 +698,6 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
return qemu_gluster_glfs_init(gconf, errp);
}
static void qemu_gluster_complete_aio(void *opaque)
{
GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
qemu_coroutine_enter(acb->coroutine);
}
/*
* AIO callback routine called from GlusterFS thread.
*/
@@ -720,7 +713,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
acb->ret = -EIO; /* Partial read/write - fail it */
}
aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
aio_co_schedule(acb->aio_context, acb->coroutine);
}
static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)

View File

@@ -189,7 +189,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
bdrv_dec_in_flight(bs);
bdrv_drained_begin(bs);
data->done = true;
qemu_coroutine_enter(co);
aio_co_wake(co);
}
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
@@ -539,7 +539,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
* (instead of producing a deadlock in the former case). */
if (!req->waiting_for) {
self->waiting_for = req;
qemu_co_queue_wait(&req->wait_queue);
qemu_co_queue_wait(&req->wait_queue, NULL);
self->waiting_for = NULL;
retry = true;
waited = true;
@@ -813,7 +813,7 @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
CoroutineIOCompletion *co = opaque;
co->ret = ret;
qemu_coroutine_enter(co->coroutine);
aio_co_wake(co->coroutine);
}
static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
@@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
}
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
{
BlockDriverState *bs = child->bs;
/* Perform I/O through a temporary buffer so that users who scribble over
* their read buffer while the operation is in progress do not end up
* modifying the image file. This is critical for zero-copy guest I/O
@@ -943,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
size_t skip_bytes;
int ret;
assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
@@ -1001,10 +1005,11 @@ err:
* handles copy on read, zeroing after EOF, and fragmentation of large
* reads; any other features must be implemented by the caller.
*/
static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
BlockDriverState *bs = child->bs;
int64_t total_bytes, max_bytes;
int ret = 0;
uint64_t bytes_remaining = bytes;
@@ -1050,7 +1055,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (!ret || pnum != nb_sectors) {
ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
goto out;
}
}
@@ -1158,7 +1163,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
}
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
tracked_request_end(&req);
@@ -1306,10 +1311,11 @@ fail:
* Forwards an already correctly aligned write request to the BlockDriver,
* after possibly fragmenting it.
*/
static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
bool waited;
int ret;
@@ -1332,6 +1338,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
assert(!waited || !req->serialising);
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
assert(child->perm & BLK_PERM_WRITE);
assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
@@ -1397,12 +1405,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
return ret;
}
static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
int64_t offset,
unsigned int bytes,
BdrvRequestFlags flags,
BdrvTrackedRequest *req)
{
BlockDriverState *bs = child->bs;
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
struct iovec iov;
@@ -1430,7 +1439,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1438,7 +1447,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
memset(buf + head_padding_bytes, 0, zero_bytes);
ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
align, &local_qiov,
flags & ~BDRV_REQ_ZERO_WRITE);
if (ret < 0) {
@@ -1452,7 +1461,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
if (bytes >= align) {
/* Write the aligned part in the middle. */
uint64_t aligned_bytes = bytes & ~(align - 1);
ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
NULL, flags);
if (ret < 0) {
goto fail;
@@ -1468,7 +1477,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(bs, req, offset, align,
ret = bdrv_aligned_preadv(child, req, offset, align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1476,7 +1485,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
memset(buf, 0, bytes);
ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
ret = bdrv_aligned_pwritev(child, req, offset, align, align,
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
}
fail:
@@ -1523,7 +1532,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
if (!qiov) {
ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
goto out;
}
@@ -1542,7 +1551,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
qemu_iovec_init_external(&head_qiov, &head_iov, 1);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
align, &head_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1584,8 +1593,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
align, &tail_qiov, 0);
ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
align, align, &tail_qiov, 0);
if (ret < 0) {
goto fail;
}
@@ -1603,7 +1612,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
bytes = ROUND_UP(bytes, align);
}
ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
@@ -2080,6 +2089,11 @@ void bdrv_aio_cancel(BlockAIOCB *acb)
if (acb->aiocb_info->get_aio_context) {
aio_poll(acb->aiocb_info->get_aio_context(acb), true);
} else if (acb->bs) {
/* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
* assert that we're not using an I/O thread. Thread-safe
* code should use bdrv_aio_cancel_async exclusively.
*/
assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
aio_poll(bdrv_get_aio_context(acb->bs), true);
} else {
abort();
@@ -2239,35 +2253,6 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
return &acb->common;
}
void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque)
{
BlockAIOCB *acb;
acb = g_malloc(aiocb_info->aiocb_size);
acb->aiocb_info = aiocb_info;
acb->bs = bs;
acb->cb = cb;
acb->opaque = opaque;
acb->refcnt = 1;
return acb;
}
void qemu_aio_ref(void *p)
{
BlockAIOCB *acb = p;
acb->refcnt++;
}
void qemu_aio_unref(void *p)
{
BlockAIOCB *acb = p;
assert(acb->refcnt > 0);
if (--acb->refcnt == 0) {
g_free(acb);
}
}
/**************************************************************/
/* Coroutine block device emulation */
@@ -2299,7 +2284,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
/* Wait until any previous flushes are completed */
while (bs->active_flush_req) {
qemu_co_queue_wait(&bs->flush_queue);
qemu_co_queue_wait(&bs->flush_queue, NULL);
}
bs->active_flush_req = true;

View File

@@ -58,6 +58,7 @@ typedef struct IscsiLun {
int events;
QEMUTimer *nop_timer;
QEMUTimer *event_timer;
QemuMutex mutex;
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
unsigned char *zeroblock;
@@ -165,8 +166,9 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
static void iscsi_co_generic_bh_cb(void *opaque)
{
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
qemu_coroutine_enter(iTask->co);
aio_co_wake(iTask->co);
}
static void iscsi_retry_timer_expired(void *opaque)
@@ -174,7 +176,7 @@ static void iscsi_retry_timer_expired(void *opaque)
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
if (iTask->co) {
qemu_coroutine_enter(iTask->co);
aio_co_wake(iTask->co);
}
}
@@ -251,6 +253,7 @@ static int iscsi_translate_sense(struct scsi_sense *sense)
return ret;
}
/* Called (via iscsi_service) with QemuMutex held. */
static void
iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
@@ -351,6 +354,7 @@ static const AIOCBInfo iscsi_aiocb_info = {
static void iscsi_process_read(void *arg);
static void iscsi_process_write(void *arg);
/* Called with QemuMutex held. */
static void
iscsi_set_events(IscsiLun *iscsilun)
{
@@ -394,8 +398,10 @@ iscsi_process_read(void *arg)
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
qemu_mutex_lock(&iscsilun->mutex);
iscsi_service(iscsi, POLLIN);
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
}
static void
@@ -404,8 +410,10 @@ iscsi_process_write(void *arg)
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
qemu_mutex_lock(&iscsilun->mutex);
iscsi_service(iscsi, POLLOUT);
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
}
static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
@@ -584,6 +592,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
uint64_t lba;
uint32_t num_sectors;
bool fua = flags & BDRV_REQ_FUA;
int r = 0;
if (fua) {
assert(iscsilun->dpofua);
@@ -599,6 +608,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
lba = sector_qemu2lun(sector_num, iscsilun);
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsilun->use_16_for_rw) {
#if LIBISCSI_API_VERSION >= (20160603)
@@ -635,7 +645,9 @@ retry:
#endif
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -650,12 +662,15 @@ retry:
if (iTask.status != SCSI_STATUS_GOOD) {
iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
return iTask.err_code;
r = iTask.err_code;
goto out_unlock;
}
iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
return 0;
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
return r;
}
@@ -688,18 +703,21 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
goto out;
}
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
sector_qemu2lun(sector_num, iscsilun),
8 + 16, iscsi_co_generic_cb,
&iTask) == NULL) {
ret = -ENOMEM;
goto out;
goto out_unlock;
}
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.do_retry) {
@@ -716,20 +734,20 @@ retry:
* because the device is busy or the cmd is not
* supported) we pretend all blocks are allocated
* for backwards compatibility */
goto out;
goto out_unlock;
}
lbas = scsi_datain_unmarshall(iTask.task);
if (lbas == NULL) {
ret = -EIO;
goto out;
goto out_unlock;
}
lbasd = &lbas->descriptors[0];
if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
ret = -EIO;
goto out;
goto out_unlock;
}
*pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
@@ -751,6 +769,8 @@ retry:
if (*pnum > nb_sectors) {
*pnum = nb_sectors;
}
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
out:
if (iTask.task != NULL) {
scsi_free_scsi_task(iTask.task);
@@ -813,6 +833,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsilun->use_16_for_rw) {
#if LIBISCSI_API_VERSION >= (20160603)
@@ -850,7 +871,9 @@ retry:
#endif
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -862,6 +885,7 @@ retry:
iTask.complete = 0;
goto retry;
}
qemu_mutex_unlock(&iscsilun->mutex);
if (iTask.status != SCSI_STATUS_GOOD) {
return iTask.err_code;
@@ -876,6 +900,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
struct IscsiTask iTask;
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
0, iscsi_co_generic_cb, &iTask) == NULL) {
@@ -884,7 +909,9 @@ retry:
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -896,6 +923,7 @@ retry:
iTask.complete = 0;
goto retry;
}
qemu_mutex_unlock(&iscsilun->mutex);
if (iTask.status != SCSI_STATUS_GOOD) {
return iTask.err_code;
@@ -905,6 +933,7 @@ retry:
}
#ifdef __linux__
/* Called (via iscsi_service) with QemuMutex held. */
static void
iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
@@ -1029,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
acb->task->expxferlen = acb->ioh->dxfer_len;
data.size = 0;
qemu_mutex_lock(&iscsilun->mutex);
if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
if (acb->ioh->iovec_count == 0) {
data.data = acb->ioh->dxferp;
@@ -1044,6 +1074,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
iscsi_aio_ioctl_cb,
(data.size > 0) ? &data : NULL,
acb) != 0) {
qemu_mutex_unlock(&iscsilun->mutex);
scsi_free_scsi_task(acb->task);
qemu_aio_unref(acb);
return NULL;
@@ -1063,6 +1094,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
}
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
return &acb->common;
}
@@ -1087,6 +1119,7 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
struct unmap_list list;
int r = 0;
if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
return -ENOTSUP;
@@ -1101,15 +1134,19 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
list.num = count / iscsilun->block_size;
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
iscsi_co_generic_cb, &iTask) == NULL) {
return -ENOMEM;
r = -ENOMEM;
goto out_unlock;
}
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -1126,17 +1163,20 @@ retry:
/* the target might fail with a check condition if it
is not happy with the alignment of the UNMAP request
we silently fail in this case */
return 0;
goto out_unlock;
}
if (iTask.status != SCSI_STATUS_GOOD) {
return iTask.err_code;
r = iTask.err_code;
goto out_unlock;
}
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
return 0;
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
return r;
}
static int
@@ -1148,6 +1188,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
uint64_t lba;
uint32_t nb_blocks;
bool use_16_for_ws = iscsilun->use_16_for_rw;
int r = 0;
if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
return -ENOTSUP;
@@ -1181,6 +1222,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
}
}
qemu_mutex_lock(&iscsilun->mutex);
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
if (use_16_for_ws) {
@@ -1200,7 +1242,9 @@ retry:
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
@@ -1210,7 +1254,8 @@ retry:
/* WRITE SAME is not supported by the target */
iscsilun->has_write_same = false;
scsi_free_scsi_task(iTask.task);
return -ENOTSUP;
r = -ENOTSUP;
goto out_unlock;
}
if (iTask.task != NULL) {
@@ -1226,7 +1271,8 @@ retry:
if (iTask.status != SCSI_STATUS_GOOD) {
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
return iTask.err_code;
r = iTask.err_code;
goto out_unlock;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
@@ -1237,32 +1283,19 @@ retry:
count >> BDRV_SECTOR_BITS);
}
return 0;
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
return r;
}
static void parse_chap(struct iscsi_context *iscsi, const char *target,
static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
Error **errp)
{
QemuOptsList *list;
QemuOpts *opts;
const char *user = NULL;
const char *password = NULL;
const char *secretid;
char *secret = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
return;
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return;
}
}
user = qemu_opt_get(opts, "user");
if (!user) {
return;
@@ -1293,64 +1326,36 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
g_free(secret);
}
static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
Error **errp)
{
QemuOptsList *list;
QemuOpts *opts;
const char *digest = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
return;
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return;
}
}
digest = qemu_opt_get(opts, "header-digest");
if (!digest) {
return;
}
if (!strcmp(digest, "CRC32C")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
} else if (!strcmp(digest, "crc32c")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
} else if (!strcmp(digest, "NONE")) {
} else if (!strcmp(digest, "none")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
} else if (!strcmp(digest, "CRC32C-NONE")) {
} else if (!strcmp(digest, "crc32c-none")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
} else if (!strcmp(digest, "NONE-CRC32C")) {
} else if (!strcmp(digest, "none-crc32c")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
} else {
error_setg(errp, "Invalid header-digest setting : %s", digest);
}
}
static char *parse_initiator_name(const char *target)
static char *get_initiator_name(QemuOpts *opts)
{
QemuOptsList *list;
QemuOpts *opts;
const char *name;
char *iscsi_name;
UuidInfo *uuid_info;
list = qemu_find_opts("iscsi");
if (list) {
opts = qemu_opts_find(list, target);
if (!opts) {
opts = QTAILQ_FIRST(&list->head);
}
if (opts) {
name = qemu_opt_get(opts, "initiator-name");
if (name) {
return g_strdup(name);
}
}
name = qemu_opt_get(opts, "initiator-name");
if (name) {
return g_strdup(name);
}
uuid_info = qmp_query_uuid(NULL);
@@ -1365,43 +1370,24 @@ static char *parse_initiator_name(const char *target)
return iscsi_name;
}
static int parse_timeout(const char *target)
{
QemuOptsList *list;
QemuOpts *opts;
const char *timeout;
list = qemu_find_opts("iscsi");
if (list) {
opts = qemu_opts_find(list, target);
if (!opts) {
opts = QTAILQ_FIRST(&list->head);
}
if (opts) {
timeout = qemu_opt_get(opts, "timeout");
if (timeout) {
return atoi(timeout);
}
}
}
return 0;
}
static void iscsi_nop_timed_event(void *opaque)
{
IscsiLun *iscsilun = opaque;
qemu_mutex_lock(&iscsilun->mutex);
if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
error_report("iSCSI: NOP timeout. Reconnecting...");
iscsilun->request_timed_out = true;
} else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
return;
goto out;
}
timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
iscsi_set_events(iscsilun);
out:
qemu_mutex_unlock(&iscsilun->mutex);
}
static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
@@ -1474,20 +1460,6 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
}
}
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "iscsi",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "filename",
.type = QEMU_OPT_STRING,
.help = "URL to the iscsi image",
},
{ /* end of list */ }
},
};
static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
int evpd, int pc, void **inq, Error **errp)
{
@@ -1605,24 +1577,178 @@ out:
}
}
static void iscsi_parse_iscsi_option(const char *target, QDict *options)
{
QemuOptsList *list;
QemuOpts *opts;
const char *user, *password, *password_secret, *initiator_name,
*header_digest, *timeout;
list = qemu_find_opts("iscsi");
if (!list) {
return;
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return;
}
}
user = qemu_opt_get(opts, "user");
if (user) {
qdict_set_default_str(options, "user", user);
}
password = qemu_opt_get(opts, "password");
if (password) {
qdict_set_default_str(options, "password", password);
}
password_secret = qemu_opt_get(opts, "password-secret");
if (password_secret) {
qdict_set_default_str(options, "password-secret", password_secret);
}
initiator_name = qemu_opt_get(opts, "initiator-name");
if (initiator_name) {
qdict_set_default_str(options, "initiator-name", initiator_name);
}
header_digest = qemu_opt_get(opts, "header-digest");
if (header_digest) {
/* -iscsi takes upper case values, but QAPI only supports lower case
* enum constant names, so we have to convert here. */
char *qapi_value = g_ascii_strdown(header_digest, -1);
qdict_set_default_str(options, "header-digest", qapi_value);
g_free(qapi_value);
}
timeout = qemu_opt_get(opts, "timeout");
if (timeout) {
qdict_set_default_str(options, "timeout", timeout);
}
}
/*
* We support iscsi url's on the form
* iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
*/
static void iscsi_parse_filename(const char *filename, QDict *options,
Error **errp)
{
struct iscsi_url *iscsi_url;
const char *transport_name;
char *lun_str;
iscsi_url = iscsi_parse_full_url(NULL, filename);
if (iscsi_url == NULL) {
error_setg(errp, "Failed to parse URL : %s", filename);
return;
}
#if LIBISCSI_API_VERSION >= (20160603)
switch (iscsi_url->transport) {
case TCP_TRANSPORT:
transport_name = "tcp";
break;
case ISER_TRANSPORT:
transport_name = "iser";
break;
default:
error_setg(errp, "Unknown transport type (%d)",
iscsi_url->transport);
return;
}
#else
transport_name = "tcp";
#endif
qdict_set_default_str(options, "transport", transport_name);
qdict_set_default_str(options, "portal", iscsi_url->portal);
qdict_set_default_str(options, "target", iscsi_url->target);
lun_str = g_strdup_printf("%d", iscsi_url->lun);
qdict_set_default_str(options, "lun", lun_str);
g_free(lun_str);
/* User/password from -iscsi take precedence over those from the URL */
iscsi_parse_iscsi_option(iscsi_url->target, options);
if (iscsi_url->user[0] != '\0') {
qdict_set_default_str(options, "user", iscsi_url->user);
qdict_set_default_str(options, "password", iscsi_url->passwd);
}
iscsi_destroy_url(iscsi_url);
}
static QemuOptsList runtime_opts = {
.name = "iscsi",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "transport",
.type = QEMU_OPT_STRING,
},
{
.name = "portal",
.type = QEMU_OPT_STRING,
},
{
.name = "target",
.type = QEMU_OPT_STRING,
},
{
.name = "user",
.type = QEMU_OPT_STRING,
},
{
.name = "password",
.type = QEMU_OPT_STRING,
},
{
.name = "password-secret",
.type = QEMU_OPT_STRING,
},
{
.name = "lun",
.type = QEMU_OPT_NUMBER,
},
{
.name = "initiator-name",
.type = QEMU_OPT_STRING,
},
{
.name = "header-digest",
.type = QEMU_OPT_STRING,
},
{
.name = "timeout",
.type = QEMU_OPT_NUMBER,
},
{ /* end of list */ }
},
};
static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = NULL;
struct iscsi_url *iscsi_url = NULL;
struct scsi_task *task = NULL;
struct scsi_inquiry_standard *inq = NULL;
struct scsi_inquiry_supported_pages *inq_vpd;
char *initiator_name = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
int i, ret = 0, timeout = 0;
const char *transport_name, *portal, *target;
#if LIBISCSI_API_VERSION >= (20160603)
enum iscsi_transport_type transport;
#endif
int i, ret = 0, timeout = 0, lun;
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1632,18 +1758,34 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
filename = qemu_opt_get(opts, "filename");
transport_name = qemu_opt_get(opts, "transport");
portal = qemu_opt_get(opts, "portal");
target = qemu_opt_get(opts, "target");
lun = qemu_opt_get_number(opts, "lun", 0);
iscsi_url = iscsi_parse_full_url(iscsi, filename);
if (iscsi_url == NULL) {
error_setg(errp, "Failed to parse URL : %s", filename);
if (!transport_name || !portal || !target) {
error_setg(errp, "Need all of transport, portal and target options");
ret = -EINVAL;
goto out;
}
if (!strcmp(transport_name, "tcp")) {
#if LIBISCSI_API_VERSION >= (20160603)
transport = TCP_TRANSPORT;
} else if (!strcmp(transport_name, "iser")) {
transport = ISER_TRANSPORT;
#else
/* TCP is what older libiscsi versions always use */
#endif
} else {
error_setg(errp, "Unknown transport: %s", transport_name);
ret = -EINVAL;
goto out;
}
memset(iscsilun, 0, sizeof(IscsiLun));
initiator_name = parse_initiator_name(iscsi_url->target);
initiator_name = get_initiator_name(opts);
iscsi = iscsi_create_context(initiator_name);
if (iscsi == NULL) {
@@ -1652,30 +1794,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
#if LIBISCSI_API_VERSION >= (20160603)
if (iscsi_init_transport(iscsi, iscsi_url->transport)) {
if (iscsi_init_transport(iscsi, transport)) {
error_setg(errp, ("Error initializing transport."));
ret = -EINVAL;
goto out;
}
#endif
if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
if (iscsi_set_targetname(iscsi, target)) {
error_setg(errp, "iSCSI: Failed to set target name.");
ret = -EINVAL;
goto out;
}
if (iscsi_url->user[0] != '\0') {
ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
iscsi_url->passwd);
if (ret != 0) {
error_setg(errp, "Failed to set initiator username and password");
ret = -EINVAL;
goto out;
}
}
/* check if we got CHAP username/password via the options */
parse_chap(iscsi, iscsi_url->target, &local_err);
apply_chap(iscsi, opts, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
ret = -EINVAL;
@@ -1688,10 +1820,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
/* check if we got HEADER_DIGEST via the options */
parse_header_digest(iscsi, iscsi_url->target, &local_err);
apply_header_digest(iscsi, opts, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
ret = -EINVAL;
@@ -1699,7 +1829,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
}
/* timeout handling is broken in libiscsi before 1.15.0 */
timeout = parse_timeout(iscsi_url->target);
timeout = qemu_opt_get_number(opts, "timeout", 0);
#if LIBISCSI_API_VERSION >= 20150621
iscsi_set_timeout(iscsi, timeout);
#else
@@ -1708,7 +1838,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
}
#endif
if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
iscsi_get_error(iscsi));
ret = -EINVAL;
@@ -1717,7 +1847,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
iscsilun->iscsi = iscsi;
iscsilun->aio_context = bdrv_get_aio_context(bs);
iscsilun->lun = iscsi_url->lun;
iscsilun->lun = lun;
iscsilun->has_write_same = true;
task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
@@ -1803,6 +1933,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
scsi_free_scsi_task(task);
task = NULL;
qemu_mutex_init(&iscsilun->mutex);
iscsi_attach_aio_context(bs, iscsilun->aio_context);
/* Guess the internal cluster (page) size of the iscsi target by the means
@@ -1820,9 +1951,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
out:
qemu_opts_del(opts);
g_free(initiator_name);
if (iscsi_url != NULL) {
iscsi_destroy_url(iscsi_url);
}
if (task != NULL) {
scsi_free_scsi_task(task);
}
@@ -1851,6 +1979,7 @@ static void iscsi_close(BlockDriverState *bs)
iscsi_destroy_context(iscsi);
g_free(iscsilun->zeroblock);
iscsi_allocmap_free(iscsilun);
qemu_mutex_destroy(&iscsilun->mutex);
memset(iscsilun, 0, sizeof(IscsiLun));
}
@@ -2031,15 +2160,15 @@ static BlockDriver bdrv_iscsi = {
.format_name = "iscsi",
.protocol_name = "iscsi",
.instance_size = sizeof(IscsiLun),
.bdrv_needs_filename = true,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.instance_size = sizeof(IscsiLun),
.bdrv_parse_filename = iscsi_parse_filename,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.bdrv_getlength = iscsi_getlength,
.bdrv_get_info = iscsi_get_info,
@@ -2066,15 +2195,15 @@ static BlockDriver bdrv_iser = {
.format_name = "iser",
.protocol_name = "iser",
.instance_size = sizeof(IscsiLun),
.bdrv_needs_filename = true,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.instance_size = sizeof(IscsiLun),
.bdrv_parse_filename = iscsi_parse_filename,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.bdrv_getlength = iscsi_getlength,
.bdrv_get_info = iscsi_get_info,

View File

@@ -54,10 +54,10 @@ struct LinuxAioState {
io_context_t ctx;
EventNotifier e;
/* io queue for submit at batch */
/* io queue for submit at batch. Protected by AioContext lock. */
LaioQueue io_q;
/* I/O completion processing */
/* I/O completion processing. Only runs in I/O thread. */
QEMUBH *completion_bh;
int event_idx;
int event_max;
@@ -100,7 +100,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
* that!
*/
if (!qemu_coroutine_entered(laiocb->co)) {
qemu_coroutine_enter(laiocb->co);
aio_co_wake(laiocb->co);
}
} else {
laiocb->common.cb(laiocb->common.opaque, ret);
@@ -234,9 +234,12 @@ static void qemu_laio_process_completions(LinuxAioState *s)
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
{
qemu_laio_process_completions(s);
aio_context_acquire(s->aio_context);
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
aio_context_release(s->aio_context);
}
static void qemu_laio_completion_bh(void *opaque)
@@ -455,6 +458,7 @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
{
aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
qemu_bh_delete(s->completion_bh);
s->aio_context = NULL;
}
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)

View File

@@ -38,7 +38,10 @@ typedef struct MirrorBlockJob {
BlockJob common;
RateLimit limit;
BlockBackend *target;
BlockDriverState *mirror_top_bs;
BlockDriverState *source;
BlockDriverState *base;
/* The name of the graph node to replace */
char *replaces;
/* The BDS to replace */
@@ -69,6 +72,7 @@ typedef struct MirrorBlockJob {
bool waiting_for_io;
int target_cluster_sectors;
int max_iov;
bool initial_zeroing_ongoing;
} MirrorBlockJob;
typedef struct MirrorOp {
@@ -117,9 +121,10 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
if (s->cow_bitmap) {
bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
}
s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
if (!s->initial_zeroing_ongoing) {
s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
}
}
qemu_iovec_destroy(&op->qiov);
g_free(op);
@@ -132,6 +137,8 @@ static void mirror_write_complete(void *opaque, int ret)
{
MirrorOp *op = opaque;
MirrorBlockJob *s = op->s;
aio_context_acquire(blk_get_aio_context(s->common.blk));
if (ret < 0) {
BlockErrorAction action;
@@ -142,12 +149,15 @@ static void mirror_write_complete(void *opaque, int ret)
}
}
mirror_iteration_done(op, ret);
aio_context_release(blk_get_aio_context(s->common.blk));
}
static void mirror_read_complete(void *opaque, int ret)
{
MirrorOp *op = opaque;
MirrorBlockJob *s = op->s;
aio_context_acquire(blk_get_aio_context(s->common.blk));
if (ret < 0) {
BlockErrorAction action;
@@ -158,10 +168,11 @@ static void mirror_read_complete(void *opaque, int ret)
}
mirror_iteration_done(op, ret);
return;
} else {
blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
0, mirror_write_complete, op);
}
blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
0, mirror_write_complete, op);
aio_context_release(blk_get_aio_context(s->common.blk));
}
static inline void mirror_clip_sectors(MirrorBlockJob *s,
@@ -319,7 +330,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
BlockDriverState *source = blk_bs(s->common.blk);
BlockDriverState *source = s->source;
int64_t sector_num, first_chunk;
uint64_t delay_ns = 0;
/* At least the first dirty chunk is mirrored in one iteration. */
@@ -378,7 +389,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
nb_chunks * sectors_per_chunk);
bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
while (nb_chunks > 0 && sector_num < end) {
int ret;
int64_t ret;
int io_sectors, io_sectors_acct;
BlockDriverState *file;
enum MirrorMethod {
@@ -489,12 +500,30 @@ static void mirror_exit(BlockJob *job, void *opaque)
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
MirrorExitData *data = opaque;
AioContext *replace_aio_context = NULL;
BlockDriverState *src = blk_bs(s->common.blk);
BlockDriverState *src = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
Error *local_err = NULL;
/* Make sure that the source BDS doesn't go away before we called
* block_job_completed(). */
bdrv_ref(src);
bdrv_ref(mirror_top_bs);
/* We don't access the source any more. Dropping any WRITE/RESIZE is
* required before it could become a backing file of target_bs. */
bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
&error_abort);
if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
BlockDriverState *backing = s->is_none_mode ? src : s->base;
if (backing_bs(target_bs) != backing) {
bdrv_set_backing_hd(target_bs, backing, &local_err);
if (local_err) {
error_report_err(local_err);
data->ret = -EPERM;
}
}
}
if (s->to_replace) {
replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -516,10 +545,6 @@ static void mirror_exit(BlockJob *job, void *opaque)
bdrv_drained_begin(target_bs);
bdrv_replace_in_backing_chain(to_replace, target_bs);
bdrv_drained_end(target_bs);
/* We just changed the BDS the job BB refers to */
blk_remove_bs(job->blk);
blk_insert_bs(job->blk, src);
}
if (s->to_replace) {
bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -532,9 +557,26 @@ static void mirror_exit(BlockJob *job, void *opaque)
g_free(s->replaces);
blk_unref(s->target);
s->target = NULL;
/* Remove the mirror filter driver from the graph. Before this, get rid of
* the blockers on the intermediate nodes so that the resulting state is
* valid. */
block_job_remove_all_bdrv(job);
bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
/* We just changed the BDS the job BB refers to (with either or both of the
* bdrv_replace_in_backing_chain() calls), so switch the BB back so the
* cleanup does the right thing. We don't need any permissions any more
* now. */
blk_remove_bs(job->blk);
blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
blk_insert_bs(job->blk, mirror_top_bs, &error_abort);
block_job_completed(&s->common, data->ret);
g_free(data);
bdrv_drained_end(src);
bdrv_unref(mirror_top_bs);
bdrv_unref(src);
}
@@ -554,7 +596,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
{
int64_t sector_num, end;
BlockDriverState *base = s->base;
BlockDriverState *bs = blk_bs(s->common.blk);
BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
int ret, n;
@@ -566,6 +608,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
return 0;
}
s->initial_zeroing_ongoing = true;
for (sector_num = 0; sector_num < end; ) {
int nb_sectors = MIN(end - sector_num,
QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
@@ -573,6 +616,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
mirror_throttle(s);
if (block_job_is_cancelled(&s->common)) {
s->initial_zeroing_ongoing = false;
return 0;
}
@@ -587,6 +631,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
}
mirror_wait_for_all_io(s);
s->initial_zeroing_ongoing = false;
}
/* First part, loop on the sectors and initialize the dirty bitmap. */
@@ -633,7 +678,7 @@ static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
MirrorExitData *data;
BlockDriverState *bs = blk_bs(s->common.blk);
BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
bool need_drain = true;
int64_t length;
@@ -651,7 +696,28 @@ static void coroutine_fn mirror_run(void *opaque)
if (s->bdev_length < 0) {
ret = s->bdev_length;
goto immediate_exit;
} else if (s->bdev_length == 0) {
}
/* Active commit must resize the base image if its size differs from the
* active layer. */
if (s->base == blk_bs(s->target)) {
int64_t base_length;
base_length = blk_getlength(s->target);
if (base_length < 0) {
ret = base_length;
goto immediate_exit;
}
if (s->bdev_length > base_length) {
ret = blk_truncate(s->target, s->bdev_length);
if (ret < 0) {
goto immediate_exit;
}
}
}
if (s->bdev_length == 0) {
/* Report BLOCK_JOB_READY and wait for complete. */
block_job_event_ready(&s->common);
s->synced = true;
@@ -844,9 +910,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
static void mirror_complete(BlockJob *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
BlockDriverState *src, *target;
BlockDriverState *target;
src = blk_bs(job->blk);
target = blk_bs(s->target);
if (!s->synced) {
@@ -878,6 +943,10 @@ static void mirror_complete(BlockJob *job, Error **errp)
replace_aio_context = bdrv_get_aio_context(s->to_replace);
aio_context_acquire(replace_aio_context);
/* TODO Translate this into permission system. Current definition of
* GRAPH_MOD would require to request it for the parents; they might
* not even be BlockDriverStates, however, so a BdrvChild can't address
* them. May need redefinition of GRAPH_MOD. */
error_setg(&s->replace_blocker,
"block device is in use by block-job-complete");
bdrv_op_block_all(s->to_replace, s->replace_blocker);
@@ -886,13 +955,6 @@ static void mirror_complete(BlockJob *job, Error **errp)
aio_context_release(replace_aio_context);
}
if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
BlockDriverState *backing = s->is_none_mode ? src : s->base;
if (backing_bs(target) != backing) {
bdrv_set_backing_hd(target, backing);
}
}
s->should_complete = true;
block_job_enter(&s->common);
}
@@ -948,6 +1010,77 @@ static const BlockJobDriver commit_active_job_driver = {
.drain = mirror_drain,
};
static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
}
static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
}
static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
{
return bdrv_co_flush(bs->backing->bs);
}
static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
BlockDriverState **file)
{
*pnum = nb_sectors;
*file = bs->backing->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
(sector_num << BDRV_SECTOR_BITS);
}
static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
{
return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags);
}
static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
{
return bdrv_co_pdiscard(bs->backing->bs, offset, count);
}
static void bdrv_mirror_top_close(BlockDriverState *bs)
{
}
static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
/* Must be able to forward guest writes to the real image */
*nperm = 0;
if (perm & BLK_PERM_WRITE) {
*nperm |= BLK_PERM_WRITE;
}
*nshared = BLK_PERM_ALL;
}
/* Dummy node that provides consistent read to its users without requiring it
* from its backing file and that allows writes on the backing file chain. */
static BlockDriver bdrv_mirror_top = {
.format_name = "mirror_top",
.bdrv_co_preadv = bdrv_mirror_top_preadv,
.bdrv_co_pwritev = bdrv_mirror_top_pwritev,
.bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes,
.bdrv_co_pdiscard = bdrv_mirror_top_pdiscard,
.bdrv_co_flush = bdrv_mirror_top_flush,
.bdrv_co_get_block_status = bdrv_mirror_top_get_block_status,
.bdrv_close = bdrv_mirror_top_close,
.bdrv_child_perm = bdrv_mirror_top_child_perm,
};
static void mirror_start_job(const char *job_id, BlockDriverState *bs,
int creation_flags, BlockDriverState *target,
const char *replaces, int64_t speed,
@@ -960,9 +1093,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
void *opaque, Error **errp,
const BlockJobDriver *driver,
bool is_none_mode, BlockDriverState *base,
bool auto_complete)
bool auto_complete, const char *filter_node_name)
{
MirrorBlockJob *s;
BlockDriverState *mirror_top_bs;
bool target_graph_mod;
bool target_is_backing;
Error *local_err = NULL;
int ret;
if (granularity == 0) {
granularity = bdrv_get_default_bitmap_granularity(target);
@@ -979,14 +1117,62 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
buf_size = DEFAULT_MIRROR_BUF_SIZE;
}
s = block_job_create(job_id, driver, bs, speed, creation_flags,
cb, opaque, errp);
if (!s) {
/* In the case of active commit, add dummy driver to provide consistent
* reads on the top, while disabling it in the intermediate nodes, and make
* the backing chain writable. */
mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name,
BDRV_O_RDWR, errp);
if (mirror_top_bs == NULL) {
return;
}
mirror_top_bs->total_sectors = bs->total_sectors;
/* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
* it alive until block_job_create() even if bs has no parent. */
bdrv_ref(mirror_top_bs);
bdrv_drained_begin(bs);
bdrv_append(mirror_top_bs, bs, &local_err);
bdrv_drained_end(bs);
if (local_err) {
bdrv_unref(mirror_top_bs);
error_propagate(errp, local_err);
return;
}
s->target = blk_new();
blk_insert_bs(s->target, target);
/* Make sure that the source is not resized while the job is running */
s = block_job_create(job_id, driver, mirror_top_bs,
BLK_PERM_CONSISTENT_READ,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
creation_flags, cb, opaque, errp);
bdrv_unref(mirror_top_bs);
if (!s) {
goto fail;
}
s->source = bs;
s->mirror_top_bs = mirror_top_bs;
/* No resize for the target either; while the mirror is still running, a
* consistent read isn't necessarily possible. We could possibly allow
* writes and graph modifications, though it would likely defeat the
* purpose of a mirror, so leave them blocked for now.
*
* In the case of active commit, things look a bit different, though,
* because the target is an already populated backing file in active use.
* We can allow anything except resize there.*/
target_is_backing = bdrv_chain_contains(bs, target);
target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE |
(target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
BLK_PERM_WRITE_UNCHANGED |
(target_is_backing ? BLK_PERM_CONSISTENT_READ |
BLK_PERM_WRITE |
BLK_PERM_GRAPH_MOD : 0));
ret = blk_insert_bs(s->target, target, errp);
if (ret < 0) {
goto fail;
}
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
@@ -1009,18 +1195,40 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
return;
}
block_job_add_bdrv(&s->common, target);
/* Required permissions are already taken with blk_new() */
block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
/* In commit_active_start() all intermediate nodes disappear, so
* any jobs in them must be blocked */
if (bdrv_chain_contains(bs, target)) {
if (target_is_backing) {
BlockDriverState *iter;
for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
block_job_add_bdrv(&s->common, iter);
/* XXX BLK_PERM_WRITE needs to be allowed so we don't block
* ourselves at s->base (if writes are blocked for a node, they are
* also blocked for its backing file). The other options would be a
* second filter driver above s->base (== target). */
ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
errp);
if (ret < 0) {
goto fail;
}
}
}
trace_mirror_start(bs, s, opaque);
block_job_start(&s->common);
return;
fail:
if (s) {
g_free(s->replaces);
blk_unref(s->target);
block_job_unref(&s->common);
}
bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
}
void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1029,7 +1237,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap, Error **errp)
bool unmap, const char *filter_node_name, Error **errp)
{
bool is_none_mode;
BlockDriverState *base;
@@ -1043,18 +1251,18 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
speed, granularity, buf_size, backing_mode,
on_source_error, on_target_error, unmap, NULL, NULL, errp,
&mirror_job_driver, is_none_mode, base, false);
&mirror_job_driver, is_none_mode, base, false,
filter_node_name);
}
void commit_active_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, int creation_flags,
int64_t speed, BlockdevOnError on_error,
const char *filter_node_name,
BlockCompletionFunc *cb, void *opaque, Error **errp,
bool auto_complete)
{
int64_t length, base_length;
int orig_base_flags;
int ret;
Error *local_err = NULL;
orig_base_flags = bdrv_get_flags(base);
@@ -1063,35 +1271,11 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
return;
}
length = bdrv_getlength(bs);
if (length < 0) {
error_setg_errno(errp, -length,
"Unable to determine length of %s", bs->filename);
goto error_restore_flags;
}
base_length = bdrv_getlength(base);
if (base_length < 0) {
error_setg_errno(errp, -base_length,
"Unable to determine length of %s", base->filename);
goto error_restore_flags;
}
if (length > base_length) {
ret = bdrv_truncate(base, length);
if (ret < 0) {
error_setg_errno(errp, -ret,
"Top image %s is larger than base image %s, and "
"resize of base image failed",
bs->filename, base->filename);
goto error_restore_flags;
}
}
mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN,
on_error, on_error, true, cb, opaque, &local_err,
&commit_active_job_driver, false, base, auto_complete);
&commit_active_job_driver, false, base, auto_complete,
filter_node_name);
if (local_err) {
error_propagate(errp, local_err);
goto error_restore_flags;

View File

@@ -33,8 +33,9 @@
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
static void nbd_recv_coroutines_enter_all(BlockDriverState *bs)
{
NBDClientSession *s = nbd_get_client_session(bs);
int i;
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
@@ -42,6 +43,7 @@ static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
qemu_coroutine_enter(s->recv_coroutine[i]);
}
}
BDRV_POLL_WHILE(bs, s->read_reply_co);
}
static void nbd_teardown_connection(BlockDriverState *bs)
@@ -56,7 +58,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
qio_channel_shutdown(client->ioc,
QIO_CHANNEL_SHUTDOWN_BOTH,
NULL);
nbd_recv_coroutines_enter_all(client);
nbd_recv_coroutines_enter_all(bs);
nbd_client_detach_aio_context(bs);
object_unref(OBJECT(client->sioc));
@@ -65,54 +67,43 @@ static void nbd_teardown_connection(BlockDriverState *bs)
client->ioc = NULL;
}
static void nbd_reply_ready(void *opaque)
static coroutine_fn void nbd_read_reply_entry(void *opaque)
{
BlockDriverState *bs = opaque;
NBDClientSession *s = nbd_get_client_session(bs);
NBDClientSession *s = opaque;
uint64_t i;
int ret;
if (!s->ioc) { /* Already closed */
return;
}
if (s->reply.handle == 0) {
/* No reply already in flight. Fetch a header. It is possible
* that another thread has done the same thing in parallel, so
* the socket is not readable anymore.
*/
for (;;) {
assert(s->reply.handle == 0);
ret = nbd_receive_reply(s->ioc, &s->reply);
if (ret == -EAGAIN) {
return;
}
if (ret < 0) {
s->reply.handle = 0;
goto fail;
break;
}
/* There's no need for a mutex on the receive side, because the
* handler acts as a synchronization point and ensures that only
* one coroutine is called until the reply finishes.
*/
i = HANDLE_TO_INDEX(s, s->reply.handle);
if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
break;
}
/* We're woken up by the recv_coroutine itself. Note that there
* is no race between yielding and reentering read_reply_co. This
* is because:
*
* - if recv_coroutine[i] runs on the same AioContext, it is only
* entered after we yield
*
* - if recv_coroutine[i] runs on a different AioContext, reentering
* read_reply_co happens through a bottom half, which can only
* run after we yield.
*/
aio_co_wake(s->recv_coroutine[i]);
qemu_coroutine_yield();
}
/* There's no need for a mutex on the receive side, because the
* handler acts as a synchronization point and ensures that only
* one coroutine is called until the reply finishes. */
i = HANDLE_TO_INDEX(s, s->reply.handle);
if (i >= MAX_NBD_REQUESTS) {
goto fail;
}
if (s->recv_coroutine[i]) {
qemu_coroutine_enter(s->recv_coroutine[i]);
return;
}
fail:
nbd_teardown_connection(bs);
}
static void nbd_restart_write(void *opaque)
{
BlockDriverState *bs = opaque;
qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
s->read_reply_co = NULL;
}
static int nbd_co_send_request(BlockDriverState *bs,
@@ -120,7 +111,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
QEMUIOVector *qiov)
{
NBDClientSession *s = nbd_get_client_session(bs);
AioContext *aio_context;
int rc, ret, i;
qemu_co_mutex_lock(&s->send_mutex);
@@ -141,11 +131,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
return -EPIPE;
}
s->send_coroutine = qemu_coroutine_self();
aio_context = bdrv_get_aio_context(bs);
aio_set_fd_handler(aio_context, s->sioc->fd, false,
nbd_reply_ready, nbd_restart_write, NULL, bs);
if (qiov) {
qio_channel_set_cork(s->ioc, true);
rc = nbd_send_request(s->ioc, request);
@@ -160,9 +145,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
} else {
rc = nbd_send_request(s->ioc, request);
}
aio_set_fd_handler(aio_context, s->sioc->fd, false,
nbd_reply_ready, NULL, NULL, bs);
s->send_coroutine = NULL;
qemu_co_mutex_unlock(&s->send_mutex);
return rc;
}
@@ -174,8 +156,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
{
int ret;
/* Wait until we're woken up by the read handler. TODO: perhaps
* peek at the next reply and avoid yielding if it's ours? */
/* Wait until we're woken up by nbd_read_reply_entry. */
qemu_coroutine_yield();
*reply = s->reply;
if (reply->handle != request->handle ||
@@ -201,7 +182,7 @@ static void nbd_coroutine_start(NBDClientSession *s,
/* Poor man semaphore. The free_sema is locked when no other request
* can be accepted, and unlocked after receiving one reply. */
if (s->in_flight == MAX_NBD_REQUESTS) {
qemu_co_queue_wait(&s->free_sema);
qemu_co_queue_wait(&s->free_sema, NULL);
assert(s->in_flight < MAX_NBD_REQUESTS);
}
s->in_flight++;
@@ -209,13 +190,19 @@ static void nbd_coroutine_start(NBDClientSession *s,
/* s->recv_coroutine[i] is set as soon as we get the send_lock. */
}
static void nbd_coroutine_end(NBDClientSession *s,
static void nbd_coroutine_end(BlockDriverState *bs,
NBDRequest *request)
{
NBDClientSession *s = nbd_get_client_session(bs);
int i = HANDLE_TO_INDEX(s, request->handle);
s->recv_coroutine[i] = NULL;
if (s->in_flight-- == MAX_NBD_REQUESTS) {
qemu_co_queue_next(&s->free_sema);
s->in_flight--;
qemu_co_queue_next(&s->free_sema);
/* Kick the read_reply_co to get the next reply. */
if (s->read_reply_co) {
aio_co_wake(s->read_reply_co);
}
}
@@ -241,7 +228,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
} else {
nbd_co_receive_reply(client, &request, &reply, qiov);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -271,7 +258,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -306,7 +293,7 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -331,7 +318,7 @@ int nbd_client_co_flush(BlockDriverState *bs)
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -357,23 +344,23 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
void nbd_client_detach_aio_context(BlockDriverState *bs)
{
aio_set_fd_handler(bdrv_get_aio_context(bs),
nbd_get_client_session(bs)->sioc->fd,
false, NULL, NULL, NULL, NULL);
NBDClientSession *client = nbd_get_client_session(bs);
qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
}
void nbd_client_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
false, nbd_reply_ready, NULL, NULL, bs);
NBDClientSession *client = nbd_get_client_session(bs);
qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
aio_co_schedule(new_context, client->read_reply_co);
}
void nbd_client_close(BlockDriverState *bs)
@@ -434,7 +421,7 @@ int nbd_client_init(BlockDriverState *bs,
/* Now that we're connected, set the socket to be non-blocking and
* kick the reply mechanism. */
qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
logout("Established connection with NBD server\n");

View File

@@ -25,7 +25,7 @@ typedef struct NBDClientSession {
CoMutex send_mutex;
CoQueue free_sema;
Coroutine *send_coroutine;
Coroutine *read_reply_co;
int in_flight;
Coroutine *recv_coroutine[MAX_NBD_REQUESTS];

View File

@@ -537,8 +537,6 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
visit_complete(ov, &saddr_qdict);
visit_free(ov);
assert(qobject_type(saddr_qdict) == QTYPE_QDICT);
qdict_put_obj(opts, "server", saddr_qdict);
if (s->export) {

View File

@@ -54,6 +54,7 @@ typedef struct NFSClient {
int events;
bool has_zero_init;
AioContext *aio_context;
QemuMutex mutex;
blkcnt_t st_blocks;
bool cache_used;
NFSServer *server;
@@ -108,12 +109,13 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
qdict_put(options, "path", qstring_from_str(uri->path));
for (i = 0; i < qp->n; i++) {
unsigned long long val;
if (!qp->p[i].value) {
error_setg(errp, "Value for NFS parameter expected: %s",
qp->p[i].name);
goto out;
}
if (parse_uint_full(qp->p[i].value, NULL, 0)) {
if (parse_uint_full(qp->p[i].value, &val, 0)) {
error_setg(errp, "Illegal value for NFS parameter: %s",
qp->p[i].name);
goto out;
@@ -190,6 +192,7 @@ static void nfs_parse_filename(const char *filename, QDict *options,
static void nfs_process_read(void *arg);
static void nfs_process_write(void *arg);
/* Called with QemuMutex held. */
static void nfs_set_events(NFSClient *client)
{
int ev = nfs_which_events(client->context);
@@ -207,15 +210,21 @@ static void nfs_set_events(NFSClient *client)
static void nfs_process_read(void *arg)
{
NFSClient *client = arg;
qemu_mutex_lock(&client->mutex);
nfs_service(client->context, POLLIN);
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
}
static void nfs_process_write(void *arg)
{
NFSClient *client = arg;
qemu_mutex_lock(&client->mutex);
nfs_service(client->context, POLLOUT);
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
}
static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
@@ -230,10 +239,12 @@ static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
static void nfs_co_generic_bh_cb(void *opaque)
{
NFSRPC *task = opaque;
task->complete = 1;
qemu_coroutine_enter(task->co);
aio_co_wake(task->co);
}
/* Called (via nfs_service) with QemuMutex held. */
static void
nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
void *private_data)
@@ -255,9 +266,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
nfs_co_generic_bh_cb, task);
}
static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov)
static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *iov,
int flags)
{
NFSClient *client = bs->opaque;
NFSRPC task;
@@ -265,14 +276,15 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
nfs_co_init_task(bs, &task);
task.iov = iov;
qemu_mutex_lock(&client->mutex);
if (nfs_pread_async(client->context, client->fh,
sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE,
nfs_co_generic_cb, &task) != 0) {
offset, bytes, nfs_co_generic_cb, &task) != 0) {
qemu_mutex_unlock(&client->mutex);
return -ENOMEM;
}
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
while (!task.complete) {
qemu_coroutine_yield();
}
@@ -289,39 +301,50 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
return 0;
}
static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov)
static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *iov,
int flags)
{
NFSClient *client = bs->opaque;
NFSRPC task;
char *buf = NULL;
bool my_buffer = false;
nfs_co_init_task(bs, &task);
buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
if (nb_sectors && buf == NULL) {
return -ENOMEM;
if (iov->niov != 1) {
buf = g_try_malloc(bytes);
if (bytes && buf == NULL) {
return -ENOMEM;
}
qemu_iovec_to_buf(iov, 0, buf, bytes);
my_buffer = true;
} else {
buf = iov->iov[0].iov_base;
}
qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
qemu_mutex_lock(&client->mutex);
if (nfs_pwrite_async(client->context, client->fh,
sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE,
buf, nfs_co_generic_cb, &task) != 0) {
g_free(buf);
offset, bytes, buf,
nfs_co_generic_cb, &task) != 0) {
qemu_mutex_unlock(&client->mutex);
if (my_buffer) {
g_free(buf);
}
return -ENOMEM;
}
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
while (!task.complete) {
qemu_coroutine_yield();
}
g_free(buf);
if (my_buffer) {
g_free(buf);
}
if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
if (task.ret != bytes) {
return task.ret < 0 ? task.ret : -EIO;
}
@@ -335,12 +358,15 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
nfs_co_init_task(bs, &task);
qemu_mutex_lock(&client->mutex);
if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
&task) != 0) {
qemu_mutex_unlock(&client->mutex);
return -ENOMEM;
}
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
while (!task.complete) {
qemu_coroutine_yield();
}
@@ -358,27 +384,27 @@ static QemuOptsList runtime_opts = {
.help = "Path of the image on the host",
},
{
.name = "uid",
.name = "user",
.type = QEMU_OPT_NUMBER,
.help = "UID value to use when talking to the server",
},
{
.name = "gid",
.name = "group",
.type = QEMU_OPT_NUMBER,
.help = "GID value to use when talking to the server",
},
{
.name = "tcp-syncnt",
.name = "tcp-syn-count",
.type = QEMU_OPT_NUMBER,
.help = "Number of SYNs to send during the session establish",
},
{
.name = "readahead",
.name = "readahead-size",
.type = QEMU_OPT_NUMBER,
.help = "Set the readahead size in bytes",
},
{
.name = "pagecache",
.name = "page-cache-size",
.type = QEMU_OPT_NUMBER,
.help = "Set the pagecache size in bytes",
},
@@ -426,6 +452,7 @@ static void nfs_file_close(BlockDriverState *bs)
{
NFSClient *client = bs->opaque;
nfs_client_close(client);
qemu_mutex_destroy(&client->mutex);
}
static NFSServer *nfs_config(QDict *options, Error **errp)
@@ -507,29 +534,29 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
goto fail;
}
if (qemu_opt_get(opts, "uid")) {
client->uid = qemu_opt_get_number(opts, "uid", 0);
if (qemu_opt_get(opts, "user")) {
client->uid = qemu_opt_get_number(opts, "user", 0);
nfs_set_uid(client->context, client->uid);
}
if (qemu_opt_get(opts, "gid")) {
client->gid = qemu_opt_get_number(opts, "gid", 0);
if (qemu_opt_get(opts, "group")) {
client->gid = qemu_opt_get_number(opts, "group", 0);
nfs_set_gid(client->context, client->gid);
}
if (qemu_opt_get(opts, "tcp-syncnt")) {
client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syncnt", 0);
if (qemu_opt_get(opts, "tcp-syn-count")) {
client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syn-count", 0);
nfs_set_tcp_syncnt(client->context, client->tcp_syncnt);
}
#ifdef LIBNFS_FEATURE_READAHEAD
if (qemu_opt_get(opts, "readahead")) {
if (qemu_opt_get(opts, "readahead-size")) {
if (open_flags & BDRV_O_NOCACHE) {
error_setg(errp, "Cannot enable NFS readahead "
"if cache.direct = on");
goto fail;
}
client->readahead = qemu_opt_get_number(opts, "readahead", 0);
client->readahead = qemu_opt_get_number(opts, "readahead-size", 0);
if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
error_report("NFS Warning: Truncating NFS readahead "
"size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
@@ -544,13 +571,13 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
#endif
#ifdef LIBNFS_FEATURE_PAGECACHE
if (qemu_opt_get(opts, "pagecache")) {
if (qemu_opt_get(opts, "page-cache-size")) {
if (open_flags & BDRV_O_NOCACHE) {
error_setg(errp, "Cannot enable NFS pagecache "
"if cache.direct = on");
goto fail;
}
client->pagecache = qemu_opt_get_number(opts, "pagecache", 0);
client->pagecache = qemu_opt_get_number(opts, "page-cache-size", 0);
if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
error_report("NFS Warning: Truncating NFS pagecache "
"size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
@@ -633,6 +660,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
if (ret < 0) {
return ret;
}
qemu_mutex_init(&client->mutex);
bs->total_sectors = ret;
ret = 0;
return ret;
@@ -688,6 +716,7 @@ static int nfs_has_zero_init(BlockDriverState *bs)
return client->has_zero_init;
}
/* Called (via nfs_service) with QemuMutex held. */
static void
nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
void *private_data)
@@ -797,28 +826,26 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
ov = qobject_output_visitor_new(&server_qdict);
visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
visit_complete(ov, &server_qdict);
assert(qobject_type(server_qdict) == QTYPE_QDICT);
qdict_put_obj(opts, "server", server_qdict);
qdict_put(opts, "path", qstring_from_str(client->path));
if (client->uid) {
qdict_put(opts, "uid", qint_from_int(client->uid));
qdict_put(opts, "user", qint_from_int(client->uid));
}
if (client->gid) {
qdict_put(opts, "gid", qint_from_int(client->gid));
qdict_put(opts, "group", qint_from_int(client->gid));
}
if (client->tcp_syncnt) {
qdict_put(opts, "tcp-syncnt",
qint_from_int(client->tcp_syncnt));
qdict_put(opts, "tcp-syn-cnt",
qint_from_int(client->tcp_syncnt));
}
if (client->readahead) {
qdict_put(opts, "readahead",
qint_from_int(client->readahead));
qdict_put(opts, "readahead-size",
qint_from_int(client->readahead));
}
if (client->pagecache) {
qdict_put(opts, "pagecache",
qint_from_int(client->pagecache));
qdict_put(opts, "page-cache-size",
qint_from_int(client->pagecache));
}
if (client->debug) {
qdict_put(opts, "debug", qint_from_int(client->debug));
@@ -855,8 +882,8 @@ static BlockDriver bdrv_nfs = {
.bdrv_create = nfs_file_create,
.bdrv_reopen_prepare = nfs_reopen_prepare,
.bdrv_co_readv = nfs_co_readv,
.bdrv_co_writev = nfs_co_writev,
.bdrv_co_preadv = nfs_co_preadv,
.bdrv_co_pwritev = nfs_co_pwritev,
.bdrv_co_flush_to_disk = nfs_co_flush,
.bdrv_detach_aio_context = nfs_detach_aio_context,

View File

@@ -215,7 +215,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
s->data_end << BDRV_SECTOR_BITS,
space << BDRV_SECTOR_BITS, 0);
} else {
ret = bdrv_truncate(bs->file->bs,
ret = bdrv_truncate(bs->file,
(s->data_end + space) << BDRV_SECTOR_BITS);
}
if (ret < 0) {
@@ -449,7 +449,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
size - res->image_end_offset);
res->leaks += count;
if (fix & BDRV_FIX_LEAKS) {
ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
ret = bdrv_truncate(bs->file, res->image_end_offset);
if (ret < 0) {
res->check_errors++;
return ret;
@@ -488,7 +488,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
}
file = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (file == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -581,6 +582,12 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
Error *local_err = NULL;
char *buf;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
if (ret < 0) {
goto fail;
@@ -681,7 +688,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
goto fail_options;
}
if (!bdrv_has_zero_init(bs->file->bs) ||
bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs)) != 0) {
s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
}
@@ -724,7 +731,7 @@ static void parallels_close(BlockDriverState *bs)
}
if (bs->open_flags & BDRV_O_RDWR) {
bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
}
g_free(s->bat_dirty_bmap);
@@ -756,6 +763,7 @@ static BlockDriver bdrv_parallels = {
.bdrv_probe = parallels_probe,
.bdrv_open = parallels_open,
.bdrv_close = parallels_close,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_get_block_status = parallels_co_get_block_status,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_flush_to_os = parallels_co_flush_to_os,

View File

@@ -237,8 +237,8 @@ void bdrv_query_image_info(BlockDriverState *bs,
size = bdrv_getlength(bs);
if (size < 0) {
error_setg_errno(errp, -size, "Can't get size of device '%s'",
bdrv_get_device_name(bs));
error_setg_errno(errp, -size, "Can't get image size '%s'",
bs->exact_filename);
goto out;
}
@@ -357,10 +357,6 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
qapi_free_BlockInfo(info);
}
static BlockStats *bdrv_query_stats(BlockBackend *blk,
const BlockDriverState *bs,
bool query_backing);
static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
{
BlockAcctStats *stats = blk_get_stats(blk);
@@ -428,9 +424,18 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
}
}
static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,
bool query_backing)
{
BlockStats *s = NULL;
s = g_malloc0(sizeof(*s));
s->stats = g_malloc0(sizeof(*s->stats));
if (!bs) {
return s;
}
if (bdrv_get_node_name(bs)[0]) {
s->has_node_name = true;
s->node_name = g_strdup(bdrv_get_node_name(bs));
@@ -440,32 +445,12 @@ static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
if (bs->file) {
s->has_parent = true;
s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
s->parent = bdrv_query_bds_stats(bs->file->bs, query_backing);
}
if (query_backing && bs->backing) {
s->has_backing = true;
s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
}
}
static BlockStats *bdrv_query_stats(BlockBackend *blk,
const BlockDriverState *bs,
bool query_backing)
{
BlockStats *s;
s = g_malloc0(sizeof(*s));
s->stats = g_malloc0(sizeof(*s->stats));
if (blk) {
s->has_device = true;
s->device = g_strdup(blk_name(blk));
bdrv_query_blk_stats(s->stats, blk);
}
if (bs) {
bdrv_query_bds_stats(s, bs, query_backing);
s->backing = bdrv_query_bds_stats(bs->backing->bs, query_backing);
}
return s;
@@ -494,42 +479,44 @@ BlockInfoList *qmp_query_block(Error **errp)
return head;
}
static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
bool query_nodes)
{
if (query_nodes) {
*bs = bdrv_next_node(*bs);
return !!*bs;
}
*blk = blk_next(*blk);
*bs = *blk ? blk_bs(*blk) : NULL;
return !!*blk;
}
BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
bool query_nodes,
Error **errp)
{
BlockStatsList *head = NULL, **p_next = &head;
BlockBackend *blk = NULL;
BlockDriverState *bs = NULL;
BlockBackend *blk;
BlockDriverState *bs;
/* Just to be safe if query_nodes is not always initialized */
query_nodes = has_query_nodes && query_nodes;
if (has_query_nodes && query_nodes) {
for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) {
BlockStatsList *info = g_malloc0(sizeof(*info));
AioContext *ctx = bdrv_get_aio_context(bs);
while (next_query_bds(&blk, &bs, query_nodes)) {
BlockStatsList *info = g_malloc0(sizeof(*info));
AioContext *ctx = blk ? blk_get_aio_context(blk)
: bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
info->value = bdrv_query_bds_stats(bs, false);
aio_context_release(ctx);
aio_context_acquire(ctx);
info->value = bdrv_query_stats(blk, bs, !query_nodes);
aio_context_release(ctx);
*p_next = info;
p_next = &info->next;
}
} else {
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
BlockStatsList *info = g_malloc0(sizeof(*info));
AioContext *ctx = blk_get_aio_context(blk);
BlockStats *s;
*p_next = info;
p_next = &info->next;
aio_context_acquire(ctx);
s = bdrv_query_bds_stats(blk_bs(blk), true);
s->has_device = true;
s->device = g_strdup(blk_name(blk));
bdrv_query_blk_stats(s->stats, blk);
aio_context_release(ctx);
info->value = s;
*p_next = info;
p_next = &info->next;
}
}
return head;
@@ -695,7 +682,6 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
visit_complete(v, &obj);
assert(qobject_type(obj) == QTYPE_QDICT);
data = qdict_get(qobject_to_qdict(obj), "data");
dump_qobject(func_fprintf, f, 1, data);
qobject_decref(obj);

View File

@@ -106,6 +106,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
QCowHeader header;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
if (ret < 0) {
goto fail;
@@ -467,7 +473,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* round to cluster size */
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
/* if encrypted, we must initialize the cluster
content which won't be written */
if (bs->encrypted &&
@@ -817,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
}
qcow_blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (qcow_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -909,7 +916,7 @@ static int qcow_make_empty(BlockDriverState *bs)
if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
l1_length) < 0)
return -1;
ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
if (ret < 0)
return ret;
@@ -1046,6 +1053,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_probe = qcow_probe,
.bdrv_open = qcow_open,
.bdrv_close = qcow_close,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_reopen_prepare = qcow_reopen_prepare,
.bdrv_create = qcow_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,

View File

@@ -932,9 +932,7 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
if (bytes == 0) {
/* Wait for the dependency to complete. We need to recheck
* the free/allocated clusters when we continue. */
qemu_co_mutex_unlock(&s->lock);
qemu_co_queue_wait(&old_alloc->dependent_requests);
qemu_co_mutex_lock(&s->lock);
qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
return -EAGAIN;
}
}

View File

@@ -83,6 +83,16 @@ static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
/*********************************************************/
/* refcount handling */
static void update_max_refcount_table_index(BDRVQcow2State *s)
{
unsigned i = s->refcount_table_size - 1;
while (i > 0 && (s->refcount_table[i] & REFT_OFFSET_MASK) == 0) {
i--;
}
/* Set s->max_refcount_table_index to the index of the last used entry */
s->max_refcount_table_index = i;
}
int qcow2_refcount_init(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
@@ -111,6 +121,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
}
for(i = 0; i < s->refcount_table_size; i++)
be64_to_cpus(&s->refcount_table[i]);
update_max_refcount_table_index(s);
}
return 0;
fail:
@@ -439,6 +450,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
s->refcount_table[refcount_table_index] = new_block;
/* If there's a hole in s->refcount_table then it can happen
* that refcount_table_index < s->max_refcount_table_index */
s->max_refcount_table_index =
MAX(s->max_refcount_table_index, refcount_table_index);
/* The new refcount block may be where the caller intended to put its
* data, so let it restart the search. */
@@ -580,6 +595,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
s->refcount_table = new_table;
s->refcount_table_size = table_size;
s->refcount_table_offset = table_offset;
update_max_refcount_table_index(s);
/* Free old table. */
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
@@ -1718,7 +1734,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
goto resize_fail;
}
ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
ret = bdrv_truncate(bs->file, offset + s->cluster_size);
if (ret < 0) {
goto resize_fail;
}
@@ -2171,6 +2187,7 @@ write_refblocks:
s->refcount_table = on_disk_reftable;
s->refcount_table_offset = reftable_offset;
s->refcount_table_size = reftable_size;
update_max_refcount_table_index(s);
return 0;
@@ -2383,7 +2400,11 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
}
if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
for (i = 0; i < s->refcount_table_size; i++) {
unsigned last_entry = s->max_refcount_table_index;
assert(last_entry < s->refcount_table_size);
assert(last_entry + 1 == s->refcount_table_size ||
(s->refcount_table[last_entry + 1] & REFT_OFFSET_MASK) == 0);
for (i = 0; i <= last_entry; i++) {
if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
s->cluster_size)) {
@@ -2871,6 +2892,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
/* Now update the rest of the in-memory information */
old_reftable = s->refcount_table;
s->refcount_table = new_reftable;
update_max_refcount_table_index(s);
s->refcount_bits = 1 << refcount_order;
s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);

View File

@@ -814,8 +814,8 @@ static int qcow2_update_options(BlockDriverState *bs, QDict *options,
return ret;
}
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVQcow2State *s = bs->opaque;
unsigned int len, i;
@@ -1205,6 +1205,18 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
}
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
return qcow2_do_open(bs, options, flags, errp);
}
static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
@@ -1785,7 +1797,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
options = qdict_clone_shallow(bs->options);
flags &= ~BDRV_O_INACTIVE;
ret = qcow2_open(bs, options, flags, &local_err);
ret = qcow2_do_open(bs, options, flags, &local_err);
QDECREF(options);
if (local_err) {
error_propagate(errp, local_err);
@@ -2190,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -2254,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow2"));
blk = blk_new_open(filename, NULL, options,
BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -2570,7 +2584,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
cluster_offset = bdrv_getlength(bs->file->bs);
return bdrv_truncate(bs->file->bs, cluster_offset);
return bdrv_truncate(bs->file, cluster_offset);
}
buf = qemu_blockalign(bs, s->cluster_size);
@@ -2743,6 +2757,7 @@ static int make_completely_empty(BlockDriverState *bs)
s->refcount_table_offset = s->cluster_size;
s->refcount_table_size = s->cluster_size / sizeof(uint64_t);
s->max_refcount_table_index = 0;
g_free(s->refcount_table);
s->refcount_table = new_reftable;
@@ -2783,7 +2798,7 @@ static int make_completely_empty(BlockDriverState *bs)
goto fail;
}
ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -3100,6 +3115,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
uint64_t cluster_size = s->cluster_size;
bool encrypt;
int refcount_bits = s->refcount_bits;
Error *local_err = NULL;
int ret;
QemuOptDesc *desc = opts->list->desc;
Qcow2AmendHelperCBInfo helper_cb_info;
@@ -3249,7 +3265,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
}
if (new_size) {
ret = bdrv_truncate(bs, new_size);
BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
ret = blk_insert_bs(blk, bs, &local_err);
if (ret < 0) {
error_report_err(local_err);
blk_unref(blk);
return ret;
}
ret = blk_truncate(blk, new_size);
blk_unref(blk);
if (ret < 0) {
return ret;
}
@@ -3386,6 +3411,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_reopen_commit = qcow2_reopen_commit,
.bdrv_reopen_abort = qcow2_reopen_abort,
.bdrv_join_options = qcow2_join_options,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = qcow2_co_get_block_status,

View File

@@ -251,6 +251,7 @@ typedef struct BDRVQcow2State {
uint64_t *refcount_table;
uint64_t refcount_table_offset;
uint32_t refcount_table_size;
uint32_t max_refcount_table_index; /* Last used entry in refcount_table */
uint64_t free_cluster_index;
uint64_t free_byte_offset;

View File

@@ -83,6 +83,7 @@ static void qed_find_cluster_cb(void *opaque, int ret)
unsigned int index;
unsigned int n;
qed_acquire(s);
if (ret) {
goto out;
}
@@ -109,6 +110,7 @@ static void qed_find_cluster_cb(void *opaque, int ret)
out:
find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
qed_release(s);
g_free(find_cluster_cb);
}

View File

@@ -31,6 +31,7 @@ static void qed_read_table_cb(void *opaque, int ret)
{
QEDReadTableCB *read_table_cb = opaque;
QEDTable *table = read_table_cb->table;
BDRVQEDState *s = read_table_cb->s;
int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
int i;
@@ -40,13 +41,15 @@ static void qed_read_table_cb(void *opaque, int ret)
}
/* Byteswap offsets */
qed_acquire(s);
for (i = 0; i < noffsets; i++) {
table->offsets[i] = le64_to_cpu(table->offsets[i]);
}
qed_release(s);
out:
/* Completion */
trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
trace_qed_read_table_cb(s, read_table_cb->table, ret);
gencb_complete(&read_table_cb->gencb, ret);
}
@@ -84,8 +87,9 @@ typedef struct {
static void qed_write_table_cb(void *opaque, int ret)
{
QEDWriteTableCB *write_table_cb = opaque;
BDRVQEDState *s = write_table_cb->s;
trace_qed_write_table_cb(write_table_cb->s,
trace_qed_write_table_cb(s,
write_table_cb->orig_table,
write_table_cb->flush,
ret);
@@ -97,8 +101,10 @@ static void qed_write_table_cb(void *opaque, int ret)
if (write_table_cb->flush) {
/* We still need to flush first */
write_table_cb->flush = false;
qed_acquire(s);
bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
write_table_cb);
qed_release(s);
return;
}
@@ -213,6 +219,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
CachedL2Table *l2_table = request->l2_table;
uint64_t l2_offset = read_l2_table_cb->l2_offset;
qed_acquire(s);
if (ret) {
/* can't trust loaded L2 table anymore */
qed_unref_l2_cache_entry(l2_table);
@@ -228,6 +235,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
assert(request->l2_table != NULL);
}
qed_release(s);
gencb_complete(&read_l2_table_cb->gencb, ret);
}

View File

@@ -273,7 +273,19 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
return l2_table;
}
static void qed_aio_next_io(void *opaque, int ret);
static void qed_aio_next_io(QEDAIOCB *acb, int ret);
static void qed_aio_start_io(QEDAIOCB *acb)
{
qed_aio_next_io(acb, 0);
}
static void qed_aio_next_io_cb(void *opaque, int ret)
{
QEDAIOCB *acb = opaque;
qed_aio_next_io(acb, ret);
}
static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
{
@@ -292,7 +304,7 @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
if (acb) {
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
}
}
@@ -333,10 +345,22 @@ static void qed_need_check_timer_cb(void *opaque)
trace_qed_need_check_timer_cb(s);
qed_acquire(s);
qed_plug_allocating_write_reqs(s);
/* Ensure writes are on disk before clearing flag */
bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
qed_release(s);
}
void qed_acquire(BDRVQEDState *s)
{
aio_context_acquire(bdrv_get_aio_context(s->bs));
}
void qed_release(BDRVQEDState *s)
{
aio_context_release(bdrv_get_aio_context(s->bs));
}
static void qed_start_need_check_timer(BDRVQEDState *s)
@@ -391,8 +415,8 @@ static void bdrv_qed_drain(BlockDriverState *bs)
}
}
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVQEDState *s = bs->opaque;
QEDHeader le_header;
@@ -526,6 +550,18 @@ out:
return ret;
}
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
return bdrv_qed_do_open(bs, options, flags, errp);
}
static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQEDState *s = bs->opaque;
@@ -589,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size,
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -721,7 +758,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
}
if (cb->co) {
qemu_coroutine_enter(cb->co);
aio_co_wake(cb->co);
}
}
@@ -918,6 +955,7 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
static void qed_aio_complete_bh(void *opaque)
{
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
BlockCompletionFunc *cb = acb->common.cb;
void *user_opaque = acb->common.opaque;
int ret = acb->bh_ret;
@@ -925,7 +963,9 @@ static void qed_aio_complete_bh(void *opaque)
qemu_aio_unref(acb);
/* Invoke callback */
qed_acquire(s);
cb(user_opaque, ret);
qed_release(s);
}
static void qed_aio_complete(QEDAIOCB *acb, int ret)
@@ -959,7 +999,7 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
if (acb) {
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
} else if (s->header.features & QED_F_NEED_CHECK) {
qed_start_need_check_timer(s);
}
@@ -984,7 +1024,7 @@ static void qed_commit_l2_update(void *opaque, int ret)
acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
assert(acb->request.l2_table != NULL);
qed_aio_next_io(opaque, ret);
qed_aio_next_io(acb, ret);
}
/**
@@ -1032,11 +1072,11 @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
if (need_alloc) {
/* Write out the whole new L2 table */
qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
qed_aio_write_l1_update, acb);
qed_aio_write_l1_update, acb);
} else {
/* Write out only the updated part of the L2 table */
qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
qed_aio_next_io, acb);
qed_aio_next_io_cb, acb);
}
return;
@@ -1088,7 +1128,7 @@ static void qed_aio_write_main(void *opaque, int ret)
}
if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
next_fn = qed_aio_next_io;
next_fn = qed_aio_next_io_cb;
} else {
if (s->bs->backing) {
next_fn = qed_aio_write_flush_before_l2_update;
@@ -1201,7 +1241,7 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
if (acb->flags & QED_AIOCB_ZERO) {
/* Skip ahead if the clusters are already zero */
if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
return;
}
@@ -1321,18 +1361,18 @@ static void qed_aio_read_data(void *opaque, int ret,
/* Handle zero cluster and backing file reads */
if (ret == QED_CLUSTER_ZERO) {
qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
return;
} else if (ret != QED_CLUSTER_FOUND) {
qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
&acb->backing_qiov, qed_aio_next_io, acb);
&acb->backing_qiov, qed_aio_next_io_cb, acb);
return;
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
qed_aio_next_io, acb);
qed_aio_next_io_cb, acb);
return;
err:
@@ -1342,9 +1382,8 @@ err:
/**
* Begin next I/O or complete the request
*/
static void qed_aio_next_io(void *opaque, int ret)
static void qed_aio_next_io(QEDAIOCB *acb, int ret)
{
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
qed_aio_write_data : qed_aio_read_data;
@@ -1400,7 +1439,7 @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
qemu_iovec_init(&acb->cur_qiov, qiov->niov);
/* Start request */
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
return &acb->common;
}
@@ -1436,7 +1475,7 @@ static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
cb->done = true;
cb->ret = ret;
if (cb->co) {
qemu_coroutine_enter(cb->co);
aio_co_wake(cb->co);
}
}
@@ -1603,7 +1642,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
bdrv_qed_close(bs);
memset(s, 0, sizeof(BDRVQEDState));
ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
if (local_err) {
error_propagate(errp, local_err);
error_prepend(errp, "Could not reopen qed layer: ");
@@ -1666,6 +1705,7 @@ static BlockDriver bdrv_qed = {
.bdrv_open = bdrv_qed_open,
.bdrv_close = bdrv_qed_close,
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = bdrv_qed_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = bdrv_qed_co_get_block_status,

View File

@@ -198,6 +198,9 @@ enum {
*/
typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
void qed_acquire(BDRVQEDState *s);
void qed_release(BDRVQEDState *s);
/**
* Generic callback for chaining async callbacks
*/

View File

@@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
/* We can safely add the child now */
bdrv_ref(child_bs);
child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
if (child == NULL) {
s->next_child_index--;
bdrv_unref(child_bs);
goto out;
}
s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
s->children[s->num_children++] = child;
out:
bdrv_drained_end(bs);
}
@@ -1126,6 +1133,8 @@ static BlockDriver bdrv_quorum = {
.bdrv_add_child = quorum_add_child,
.bdrv_del_child = quorum_del_child,
.bdrv_child_perm = bdrv_filter_default_perms,
.is_filter = true,
.bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
};

View File

@@ -341,7 +341,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)
s->size = offset;
offset += s->offset;
return bdrv_truncate(bs->file->bs, offset);
return bdrv_truncate(bs->file, offset);
}
static int raw_media_changed(BlockDriverState *bs)
@@ -384,6 +384,12 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
BDRVRawState *s = bs->opaque;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
bs->sg = bs->file->bs->sg;
bs->supported_write_flags = BDRV_REQ_FUA &
bs->file->bs->supported_write_flags;
@@ -461,6 +467,7 @@ BlockDriver bdrv_raw = {
.bdrv_reopen_abort = &raw_reopen_abort,
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_create = &raw_create,
.bdrv_co_preadv = &raw_co_preadv,
.bdrv_co_pwritev = &raw_co_pwritev,

View File

@@ -62,6 +62,13 @@
#define RBD_MAX_SNAP_NAME_SIZE 128
#define RBD_MAX_SNAPS 100
/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
#ifdef LIBRBD_SUPPORTS_IOVEC
#define LIBRBD_USE_IOVEC 1
#else
#define LIBRBD_USE_IOVEC 0
#endif
typedef enum {
RBD_AIO_READ,
RBD_AIO_WRITE,
@@ -310,6 +317,17 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
return ret;
}
static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
{
if (LIBRBD_USE_IOVEC) {
RBDAIOCB *acb = rcb->acb;
iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
acb->qiov->size - offs);
} else {
memset(rcb->buf + offs, 0, rcb->size - offs);
}
}
static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
{
Error *local_err = NULL;
@@ -426,11 +444,11 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
}
} else {
if (r < 0) {
memset(rcb->buf, 0, rcb->size);
qemu_rbd_memset(rcb, 0);
acb->ret = r;
acb->error = 1;
} else if (r < rcb->size) {
memset(rcb->buf + r, 0, rcb->size - r);
qemu_rbd_memset(rcb, r);
if (!acb->error) {
acb->ret = rcb->size;
}
@@ -441,10 +459,13 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
g_free(rcb);
if (acb->cmd == RBD_AIO_READ) {
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
if (!LIBRBD_USE_IOVEC) {
if (acb->cmd == RBD_AIO_READ) {
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
}
qemu_vfree(acb->bounce);
}
qemu_vfree(acb->bounce);
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
qemu_aio_unref(acb);
@@ -655,7 +676,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
RBDAIOCB *acb;
RADOSCB *rcb = NULL;
rbd_completion_t c;
char *buf;
int r;
BDRVRBDState *s = bs->opaque;
@@ -664,27 +684,29 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
acb->cmd = cmd;
acb->qiov = qiov;
assert(!qiov || qiov->size == size);
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
acb->bounce = qemu_try_blockalign(bs, qiov->size);
if (acb->bounce == NULL) {
goto failed;
rcb = g_new(RADOSCB, 1);
if (!LIBRBD_USE_IOVEC) {
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
acb->bounce = qemu_try_blockalign(bs, qiov->size);
if (acb->bounce == NULL) {
goto failed;
}
}
if (cmd == RBD_AIO_WRITE) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
}
rcb->buf = acb->bounce;
}
acb->ret = 0;
acb->error = 0;
acb->s = s;
if (cmd == RBD_AIO_WRITE) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
}
buf = acb->bounce;
rcb = g_new(RADOSCB, 1);
rcb->acb = acb;
rcb->buf = buf;
rcb->s = acb->s;
rcb->size = size;
r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
@@ -694,10 +716,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
switch (cmd) {
case RBD_AIO_WRITE:
r = rbd_aio_write(s->image, off, size, buf, c);
#ifdef LIBRBD_SUPPORTS_IOVEC
r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
#else
r = rbd_aio_write(s->image, off, size, rcb->buf, c);
#endif
break;
case RBD_AIO_READ:
r = rbd_aio_read(s->image, off, size, buf, c);
#ifdef LIBRBD_SUPPORTS_IOVEC
r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
#else
r = rbd_aio_read(s->image, off, size, rcb->buf, c);
#endif
break;
case RBD_AIO_DISCARD:
r = rbd_aio_discard_wrapper(s->image, off, size, c);
@@ -712,14 +742,16 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
if (r < 0) {
goto failed_completion;
}
return &acb->common;
failed_completion:
rbd_aio_release(c);
failed:
g_free(rcb);
qemu_vfree(acb->bounce);
if (!LIBRBD_USE_IOVEC) {
qemu_vfree(acb->bounce);
}
qemu_aio_unref(acb);
return NULL;
}

View File

@@ -86,6 +86,12 @@ static int replication_open(BlockDriverState *bs, QDict *options,
const char *mode;
const char *top_id;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
ret = -EINVAL;
opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -638,7 +644,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
s->replication_state = BLOCK_REPLICATION_FAILOVER;
commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
replication_done, bs, errp, true);
NULL, replication_done, bs, errp, true);
break;
default:
aio_context_release(aio_context);
@@ -654,6 +660,7 @@ BlockDriver bdrv_replication = {
.bdrv_open = replication_open,
.bdrv_close = replication_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = replication_getlength,
.bdrv_co_readv = replication_co_readv,

View File

@@ -306,6 +306,7 @@ static inline size_t count_data_objs(const struct SheepdogInode *inode)
} while (0)
typedef struct SheepdogAIOCB SheepdogAIOCB;
typedef struct BDRVSheepdogState BDRVSheepdogState;
typedef struct AIOReq {
SheepdogAIOCB *aiocb;
@@ -334,7 +335,7 @@ enum AIOCBState {
|| y->max_affect_data_idx < x->min_affect_data_idx))
struct SheepdogAIOCB {
BlockAIOCB common;
BDRVSheepdogState *s;
QEMUIOVector *qiov;
@@ -345,9 +346,6 @@ struct SheepdogAIOCB {
enum AIOCBState aiocb_type;
Coroutine *coroutine;
void (*aio_done_func)(SheepdogAIOCB *);
bool cancelable;
int nr_pending;
uint32_t min_affect_data_idx;
@@ -365,7 +363,7 @@ struct SheepdogAIOCB {
QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
};
typedef struct BDRVSheepdogState {
struct BDRVSheepdogState {
BlockDriverState *bs;
AioContext *aio_context;
@@ -392,7 +390,7 @@ typedef struct BDRVSheepdogState {
CoQueue overlapping_queue;
QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
} BDRVSheepdogState;
};
typedef struct BDRVSheepdogReopenState {
int fd;
@@ -450,14 +448,13 @@ static const char * sd_strerror(int err)
*
* 1. In sd_co_rw_vector, we send the I/O requests to the server and
* link the requests to the inflight_list in the
* BDRVSheepdogState. The function exits without waiting for
* BDRVSheepdogState. The function yields while waiting for
* receiving the response.
*
* 2. We receive the response in aio_read_response, the fd handler to
* the sheepdog connection. If metadata update is needed, we send
* the write request to the vdi object in sd_write_done, the write
* completion function. We switch back to sd_co_readv/writev after
* all the requests belonging to the AIOCB are finished.
* the sheepdog connection. We switch back to sd_co_readv/sd_writev
* after all the requests belonging to the AIOCB are finished. If
* needed, sd_co_writev will send another requests for the vdi object.
*/
static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
@@ -482,94 +479,34 @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
return aio_req;
}
static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
{
SheepdogAIOCB *acb = aio_req->aiocb;
SheepdogAIOCB *cb;
acb->cancelable = false;
QLIST_REMOVE(aio_req, aio_siblings);
g_free(aio_req);
acb->nr_pending--;
}
static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
{
qemu_coroutine_enter(acb->coroutine);
qemu_aio_unref(acb);
}
/*
* Check whether the specified acb can be canceled
*
* We can cancel aio when any request belonging to the acb is:
* - Not processed by the sheepdog server.
* - Not linked to the inflight queue.
*/
static bool sd_acb_cancelable(const SheepdogAIOCB *acb)
{
BDRVSheepdogState *s = acb->common.bs->opaque;
AIOReq *aioreq;
if (!acb->cancelable) {
return false;
}
QLIST_FOREACH(aioreq, &s->inflight_aio_head, aio_siblings) {
if (aioreq->aiocb == acb) {
return false;
retry:
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
if (AIOCBOverlapping(acb, cb)) {
qemu_co_queue_wait(&s->overlapping_queue, NULL);
goto retry;
}
}
return true;
}
static void sd_aio_cancel(BlockAIOCB *blockacb)
static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
QEMUIOVector *qiov, int64_t sector_num, int nb_sectors,
int type)
{
SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
BDRVSheepdogState *s = acb->common.bs->opaque;
AIOReq *aioreq, *next;
if (sd_acb_cancelable(acb)) {
/* Remove outstanding requests from failed queue. */
QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
next) {
if (aioreq->aiocb == acb) {
free_aio_req(s, aioreq);
}
}
assert(acb->nr_pending == 0);
if (acb->common.cb) {
acb->common.cb(acb->common.opaque, -ECANCELED);
}
sd_finish_aiocb(acb);
}
}
static const AIOCBInfo sd_aiocb_info = {
.aiocb_size = sizeof(SheepdogAIOCB),
.cancel_async = sd_aio_cancel,
};
static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, int nb_sectors)
{
SheepdogAIOCB *acb;
uint32_t object_size;
BDRVSheepdogState *s = bs->opaque;
object_size = (UINT32_C(1) << s->inode.block_size_shift);
acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL);
acb->s = s;
acb->qiov = qiov;
acb->sector_num = sector_num;
acb->nb_sectors = nb_sectors;
acb->aio_done_func = NULL;
acb->cancelable = true;
acb->coroutine = qemu_coroutine_self();
acb->ret = 0;
acb->nr_pending = 0;
@@ -580,8 +517,14 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
acb->min_dirty_data_idx = UINT32_MAX;
acb->max_dirty_data_idx = 0;
acb->aiocb_type = type;
return acb;
if (type == AIOCB_FLUSH_CACHE) {
return;
}
wait_for_overlapping_aiocb(s, acb);
QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
}
/* Return -EIO in case of error, file descriptor on success */
@@ -632,13 +575,6 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
return ret;
}
static void restart_co_req(void *opaque)
{
Coroutine *co = opaque;
qemu_coroutine_enter(co);
}
typedef struct SheepdogReqCo {
int sockfd;
BlockDriverState *bs;
@@ -649,12 +585,19 @@ typedef struct SheepdogReqCo {
unsigned int *rlen;
int ret;
bool finished;
Coroutine *co;
} SheepdogReqCo;
static void restart_co_req(void *opaque)
{
SheepdogReqCo *srco = opaque;
aio_co_wake(srco->co);
}
static coroutine_fn void do_co_req(void *opaque)
{
int ret;
Coroutine *co;
SheepdogReqCo *srco = opaque;
int sockfd = srco->sockfd;
SheepdogReq *hdr = srco->hdr;
@@ -662,9 +605,9 @@ static coroutine_fn void do_co_req(void *opaque)
unsigned int *wlen = srco->wlen;
unsigned int *rlen = srco->rlen;
co = qemu_coroutine_self();
srco->co = qemu_coroutine_self();
aio_set_fd_handler(srco->aio_context, sockfd, false,
NULL, restart_co_req, NULL, co);
NULL, restart_co_req, NULL, srco);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
@@ -672,7 +615,7 @@ static coroutine_fn void do_co_req(void *opaque)
}
aio_set_fd_handler(srco->aio_context, sockfd, false,
restart_co_req, NULL, NULL, co);
restart_co_req, NULL, NULL, srco);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
@@ -700,6 +643,7 @@ out:
aio_set_fd_handler(srco->aio_context, sockfd, false,
NULL, NULL, NULL, NULL);
srco->co = NULL;
srco->ret = ret;
srco->finished = true;
if (srco->bs) {
@@ -797,7 +741,6 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
while (!QLIST_EMPTY(&s->failed_aio_head)) {
aio_req = QLIST_FIRST(&s->failed_aio_head);
QLIST_REMOVE(aio_req, aio_siblings);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
resend_aioreq(s, aio_req);
}
}
@@ -840,9 +783,6 @@ static void coroutine_fn aio_read_response(void *opaque)
switch (acb->aiocb_type) {
case AIOCB_WRITE_UDATA:
/* this coroutine context is no longer suitable for co_recv
* because we may send data to update vdi objects */
s->co_recv = NULL;
if (!is_data_obj(aio_req->oid)) {
break;
}
@@ -890,6 +830,12 @@ static void coroutine_fn aio_read_response(void *opaque)
}
}
/* No more data for this aio_req (reload_inode below uses its own file
* descriptor handler which doesn't use co_recv).
*/
s->co_recv = NULL;
QLIST_REMOVE(aio_req, aio_siblings);
switch (rsp.result) {
case SD_RES_SUCCESS:
break;
@@ -907,26 +853,26 @@ static void coroutine_fn aio_read_response(void *opaque)
aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id);
}
resend_aioreq(s, aio_req);
goto out;
return;
default:
acb->ret = -EIO;
error_report("%s", sd_strerror(rsp.result));
break;
}
free_aio_req(s, aio_req);
if (!acb->nr_pending) {
g_free(aio_req);
if (!--acb->nr_pending) {
/*
* We've finished all requests which belong to the AIOCB, so
* we can switch back to sd_co_readv/writev now.
*/
acb->aio_done_func(acb);
aio_co_wake(acb->coroutine);
}
out:
s->co_recv = NULL;
return;
err:
s->co_recv = NULL;
reconnect_to_sdog(opaque);
}
@@ -938,14 +884,14 @@ static void co_read_response(void *opaque)
s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
}
qemu_coroutine_enter(s->co_recv);
aio_co_wake(s->co_recv);
}
static void co_write_request(void *opaque)
{
BDRVSheepdogState *s = opaque;
qemu_coroutine_enter(s->co_send);
aio_co_wake(s->co_send);
}
/*
@@ -1176,6 +1122,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
uint64_t old_oid = aio_req->base_oid;
bool create = aio_req->create;
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
if (!nr_copies) {
error_report("bug");
}
@@ -1661,7 +1609,7 @@ static int sd_prealloc(const char *filename, Error **errp)
int ret;
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
if (blk == NULL) {
ret = -EIO;
goto out_with_err_set;
@@ -2025,11 +1973,10 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
/*
* This function is called after writing data objects. If we need to
* update metadata, this sends a write request to the vdi object.
* Otherwise, this switches back to sd_co_readv/writev.
*/
static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
{
BDRVSheepdogState *s = acb->common.bs->opaque;
BDRVSheepdogState *s = acb->s;
struct iovec iov;
AIOReq *aio_req;
uint32_t offset, data_len, mn, mx;
@@ -2038,6 +1985,7 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
mx = acb->max_dirty_data_idx;
if (mn <= mx) {
/* we need to update the vdi object. */
++acb->nr_pending;
offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
mn * sizeof(s->inode.data_vdi_id[0]);
data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
@@ -2049,15 +1997,11 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
iov.iov_len = sizeof(s->inode);
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
data_len, offset, 0, false, 0, offset);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
acb->aio_done_func = sd_finish_aiocb;
acb->aiocb_type = AIOCB_WRITE_UDATA;
return;
if (--acb->nr_pending) {
qemu_coroutine_yield();
}
}
sd_finish_aiocb(acb);
}
/* Delete current working VDI on the snapshot chain */
@@ -2169,16 +2113,15 @@ out:
* Returns 1 when we need to wait a response, 0 when there is no sent
* request and -errno in error cases.
*/
static int coroutine_fn sd_co_rw_vector(void *p)
static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
{
SheepdogAIOCB *acb = p;
int ret = 0;
unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE;
unsigned long idx;
uint32_t object_size;
uint64_t oid;
uint64_t offset;
BDRVSheepdogState *s = acb->common.bs->opaque;
BDRVSheepdogState *s = acb->s;
SheepdogInode *inode = &s->inode;
AIOReq *aio_req;
@@ -2190,7 +2133,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
ret = sd_create_branch(s);
if (ret) {
acb->ret = -EIO;
goto out;
return;
}
}
@@ -2255,8 +2198,6 @@ static int coroutine_fn sd_co_rw_vector(void *p)
old_oid,
acb->aiocb_type == AIOCB_DISCARD_OBJ ?
0 : done);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
done:
@@ -2264,31 +2205,25 @@ static int coroutine_fn sd_co_rw_vector(void *p)
idx++;
done += len;
}
out:
if (!--acb->nr_pending) {
return acb->ret;
if (--acb->nr_pending) {
qemu_coroutine_yield();
}
return 1;
}
static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
static void sd_aio_complete(SheepdogAIOCB *acb)
{
SheepdogAIOCB *cb;
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
if (AIOCBOverlapping(aiocb, cb)) {
return true;
}
if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
return;
}
QLIST_INSERT_HEAD(&s->inflight_aiocb_head, aiocb, aiocb_siblings);
return false;
QLIST_REMOVE(acb, aiocb_siblings);
qemu_co_queue_restart_all(&acb->s->overlapping_queue);
}
static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
SheepdogAIOCB *acb;
SheepdogAIOCB acb;
int ret;
int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
BDRVSheepdogState *s = bs->opaque;
@@ -2300,85 +2235,50 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
}
}
acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
acb->aio_done_func = sd_write_done;
acb->aiocb_type = AIOCB_WRITE_UDATA;
sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA);
sd_co_rw_vector(&acb);
sd_write_done(&acb);
sd_aio_complete(&acb);
retry:
if (check_overlapping_aiocb(s, acb)) {
qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
return acb.ret;
}
static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
SheepdogAIOCB *acb;
int ret;
SheepdogAIOCB acb;
BDRVSheepdogState *s = bs->opaque;
acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
acb->aiocb_type = AIOCB_READ_UDATA;
acb->aio_done_func = sd_finish_aiocb;
sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA);
sd_co_rw_vector(&acb);
sd_aio_complete(&acb);
retry:
if (check_overlapping_aiocb(s, acb)) {
qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
return acb.ret;
}
static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
{
BDRVSheepdogState *s = bs->opaque;
SheepdogAIOCB *acb;
SheepdogAIOCB acb;
AIOReq *aio_req;
if (s->cache_flags != SD_FLAG_CMD_CACHE) {
return 0;
}
acb = sd_aio_setup(bs, NULL, 0, 0);
acb->aiocb_type = AIOCB_FLUSH_CACHE;
acb->aio_done_func = sd_finish_aiocb;
sd_aio_setup(&acb, s, NULL, 0, 0, AIOCB_FLUSH_CACHE);
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
acb.nr_pending++;
aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id),
0, 0, 0, false, 0, 0);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type);
qemu_coroutine_yield();
return acb->ret;
if (--acb.nr_pending) {
qemu_coroutine_yield();
}
sd_aio_complete(&acb);
return acb.ret;
}
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
@@ -2812,9 +2712,8 @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
int count)
{
SheepdogAIOCB *acb;
SheepdogAIOCB acb;
BDRVSheepdogState *s = bs->opaque;
int ret;
QEMUIOVector discard_iov;
struct iovec iov;
uint32_t zero = 0;
@@ -2832,31 +2731,12 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) {
return -ENOTSUP;
}
acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
acb->aiocb_type = AIOCB_DISCARD_OBJ;
acb->aio_done_func = sd_finish_aiocb;
sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
sd_co_rw_vector(&acb);
sd_aio_complete(&acb);
retry:
if (check_overlapping_aiocb(s, acb)) {
qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
return acb.ret;
}
static coroutine_fn int64_t

View File

@@ -889,10 +889,14 @@ static void restart_coroutine(void *opaque)
DPRINTF("co=%p", co);
qemu_coroutine_enter(co);
aio_co_wake(co);
}
static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
/* A non-blocking call returned EAGAIN, so yield, ensuring the
* handlers are set up so that we'll be rescheduled when there is an
* interesting event on the socket.
*/
static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
{
int r;
IOHandler *rd_handler = NULL, *wr_handler = NULL;
@@ -912,25 +916,10 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
false, rd_handler, wr_handler, NULL, co);
}
static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
BlockDriverState *bs)
{
DPRINTF("s->sock=%d", s->sock);
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
false, NULL, NULL, NULL, NULL);
}
/* A non-blocking call returned EAGAIN, so yield, ensuring the
* handlers are set up so that we'll be rescheduled when there is an
* interesting event on the socket.
*/
static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
{
set_fd_handler(s, bs);
qemu_coroutine_yield();
clear_fd_handler(s, bs);
DPRINTF("s->sock=%d - back", s->sock);
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
NULL, NULL, NULL, NULL);
}
/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position

View File

@@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque)
StreamCompleteData *data = opaque;
BlockDriverState *bs = blk_bs(job->blk);
BlockDriverState *base = s->base;
Error *local_err = NULL;
if (!block_job_is_cancelled(&s->common) && data->reached_end &&
data->ret == 0) {
@@ -79,11 +80,19 @@ static void stream_complete(BlockJob *job, void *opaque)
}
}
data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
bdrv_set_backing_hd(bs, base);
bdrv_set_backing_hd(bs, base, &local_err);
if (local_err) {
error_report_err(local_err);
data->ret = -EPERM;
goto out;
}
}
out:
/* Reopen the image back in read-only mode if necessary */
if (s->bs_flags != bdrv_get_flags(bs)) {
/* Give up write permissions before making it read-only */
blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
bdrv_reopen(bs, s->bs_flags, NULL);
}
@@ -229,25 +238,35 @@ void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *iter;
int orig_bs_flags;
s = block_job_create(job_id, &stream_job_driver, bs, speed,
BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
return;
}
/* Make sure that the image is opened in read-write mode */
orig_bs_flags = bdrv_get_flags(bs);
if (!(orig_bs_flags & BDRV_O_RDWR)) {
if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
block_job_unref(&s->common);
return;
}
}
/* Block all intermediate nodes between bs and base, because they
* will disappear from the chain after this operation */
/* Prevent concurrent jobs trying to modify the graph structure here, we
* already have our own plans. Also don't allow resize as the image size is
* queried only at the job start and then cached. */
s = block_job_create(job_id, &stream_job_driver, bs,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_GRAPH_MOD,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_WRITE,
speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
goto fail;
}
/* Block all intermediate nodes between bs and base, because they will
* disappear from the chain after this operation. The streaming job reads
* every block only once, assuming that it doesn't change, so block writes
* and resizes. */
for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
block_job_add_bdrv(&s->common, iter);
block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
&error_abort);
}
s->base = base;
@@ -257,4 +276,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
s->on_error = on_error;
trace_stream_start(bs, base, s);
block_job_start(&s->common);
return;
fail:
if (orig_bs_flags != bdrv_get_flags(bs)) {
bdrv_reopen(bs, s->bs_flags, NULL);
}
}

View File

@@ -326,7 +326,7 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
if (must_wait || blkp->pending_reqs[is_write]) {
blkp->pending_reqs[is_write]++;
qemu_mutex_unlock(&tg->lock);
qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
qemu_mutex_lock(&tg->lock);
blkp->pending_reqs[is_write]--;
}
@@ -416,7 +416,9 @@ static void timer_cb(BlockBackend *blk, bool is_write)
qemu_mutex_unlock(&tg->lock);
/* Run the request that was waiting for this timer */
aio_context_acquire(blk_get_aio_context(blk));
empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
aio_context_release(blk_get_aio_context(blk));
/* If the request queue was empty then we have to take care of
* scheduling the next one */

View File

@@ -35,8 +35,6 @@ mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_n
mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
# block/backup.c
backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d"

View File

@@ -363,6 +363,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
logout("\n");
ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
@@ -757,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -885,6 +892,7 @@ static BlockDriver bdrv_vdi = {
.bdrv_open = vdi_open,
.bdrv_close = vdi_close,
.bdrv_reopen_prepare = vdi_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = vdi_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = vdi_co_get_block_status,

View File

@@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
if (new_file_size % (1024*1024)) {
/* round up to nearest 1MB boundary */
new_file_size = ((new_file_size >> 20) + 1) << 20;
bdrv_truncate(bs->file->bs, new_file_size);
bdrv_truncate(bs->file, new_file_size);
}
}
qemu_vfree(desc_entries);

View File

@@ -898,6 +898,12 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
uint64_t signature;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
s->bat = NULL;
s->first_visible_write = true;
@@ -1165,7 +1171,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
/* per the spec, the address for a block is in units of 1MB */
*new_offset = ROUND_UP(*new_offset, 1024 * 1024);
return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
return bdrv_truncate(bs->file, *new_offset + s->block_size);
}
/*
@@ -1853,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1977,6 +1984,7 @@ static BlockDriver bdrv_vhdx = {
.bdrv_open = vhdx_open,
.bdrv_close = vhdx_close,
.bdrv_reopen_prepare = vhdx_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_readv = vhdx_co_readv,
.bdrv_co_writev = vhdx_co_writev,
.bdrv_create = vhdx_create,

View File

@@ -943,6 +943,12 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
uint32_t magic;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
buf = vmdk_read_desc(bs->file, 0, errp);
if (!buf) {
return -EINVAL;
@@ -1361,8 +1367,8 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
goto out;
}
data->lba = offset >> BDRV_SECTOR_BITS;
data->size = buf_len;
data->lba = cpu_to_le64(offset >> BDRV_SECTOR_BITS);
data->size = cpu_to_le32(buf_len);
n_bytes = buf_len + sizeof(VmdkGrainMarker);
iov = (struct iovec) {
@@ -1697,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -2065,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
}
new_blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (new_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -2353,6 +2361,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_open = vmdk_open,
.bdrv_check = vmdk_check,
.bdrv_reopen_prepare = vmdk_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_preadv = vmdk_co_preadv,
.bdrv_co_pwritev = vmdk_co_pwritev,
.bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed,

View File

@@ -220,6 +220,12 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int disk_type = VHD_DYNAMIC;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
@@ -909,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1061,6 +1068,7 @@ static BlockDriver bdrv_vpc = {
.bdrv_open = vpc_open,
.bdrv_close = vpc_close,
.bdrv_reopen_prepare = vpc_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = vpc_create,
.bdrv_co_preadv = vpc_co_preadv,

View File

@@ -2968,6 +2968,7 @@ static void write_target_close(BlockDriverState *bs) {
static BlockDriver vvfat_write_target = {
.format_name = "vvfat_write_target",
.instance_size = sizeof(void*),
.bdrv_co_pwritev = write_target_commit,
.bdrv_close = write_target_close,
};
@@ -3036,13 +3037,12 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
unlink(s->qcow_filename);
#endif
backing = bdrv_new();
bdrv_set_backing_hd(s->bs, backing);
bdrv_unref(backing);
backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR,
&error_abort);
*(void**) backing->opaque = s;
s->bs->backing->bs->drv = &vvfat_write_target;
s->bs->backing->bs->opaque = g_new(void *, 1);
*(void**)s->bs->backing->bs->opaque = s;
bdrv_set_backing_hd(s->bs, backing, &error_abort);
bdrv_unref(backing);
return 0;
@@ -3052,6 +3052,27 @@ err:
return ret;
}
static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
BDRVVVFATState *s = bs->opaque;
assert(c == s->qcow || role == &child_backing);
if (c == s->qcow) {
/* This is a private node, nobody should try to attach to it */
*nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
*nshared = BLK_PERM_WRITE_UNCHANGED;
} else {
/* The backing file is there so 'commit' can use it. vvfat doesn't
* access it in any way. */
*nperm = 0;
*nshared = BLK_PERM_ALL;
}
}
static void vvfat_close(BlockDriverState *bs)
{
BDRVVVFATState *s = bs->opaque;
@@ -3077,6 +3098,7 @@ static BlockDriver bdrv_vvfat = {
.bdrv_file_open = vvfat_open,
.bdrv_refresh_limits = vvfat_refresh_limits,
.bdrv_close = vvfat_close,
.bdrv_child_perm = vvfat_child_perm,
.bdrv_co_preadv = vvfat_co_preadv,
.bdrv_co_pwritev = vvfat_co_pwritev,

View File

@@ -41,7 +41,7 @@ struct QEMUWin32AIOState {
HANDLE hIOCP;
EventNotifier e;
int count;
bool is_aio_context_attached;
AioContext *aio_ctx;
};
typedef struct QEMUWin32AIOCB {
@@ -87,7 +87,6 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
qemu_vfree(waiocb->buf);
}
waiocb->common.cb(waiocb->common.opaque, ret);
qemu_aio_unref(waiocb);
}
@@ -176,13 +175,13 @@ void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
AioContext *old_context)
{
aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
aio->is_aio_context_attached = false;
aio->aio_ctx = NULL;
}
void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
AioContext *new_context)
{
aio->is_aio_context_attached = true;
aio->aio_ctx = new_context;
aio_set_event_notifier(new_context, &aio->e, false,
win32_aio_completion_cb, NULL);
}
@@ -212,7 +211,7 @@ out_free_state:
void win32_aio_cleanup(QEMUWin32AIOState *aio)
{
assert(!aio->is_aio_context_attached);
assert(!aio->aio_ctx);
CloseHandle(aio->hIOCP);
event_notifier_cleanup(&aio->e);
g_free(aio);

View File

@@ -16,7 +16,6 @@
#include "qapi/qmp/qerror.h"
#include "sysemu/sysemu.h"
#include "qmp-commands.h"
#include "trace.h"
#include "block/nbd.h"
#include "io/channel-socket.h"

View File

@@ -48,10 +48,11 @@
#include "sysemu/sysemu.h"
#include "block/block_int.h"
#include "qmp-commands.h"
#include "trace.h"
#include "block/trace.h"
#include "sysemu/arch_init.h"
#include "qemu/cutils.h"
#include "qemu/help_option.h"
#include "qemu/throttle-options.h"
static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
@@ -227,27 +228,30 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
return NULL;
}
bool drive_check_orphaned(void)
void drive_check_orphaned(void)
{
BlockBackend *blk;
DriveInfo *dinfo;
bool rs = false;
Location loc;
bool orphans = false;
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
/* If dinfo->bdrv->dev is NULL, it has no device attached. */
/* Unless this is a default drive, this may be an oversight. */
if (!blk_get_attached_dev(blk) && !dinfo->is_default &&
dinfo->type != IF_NONE) {
fprintf(stderr, "Warning: Orphaned drive without device: "
"id=%s,file=%s,if=%s,bus=%d,unit=%d\n",
blk_name(blk), blk_bs(blk) ? blk_bs(blk)->filename : "",
if_name[dinfo->type], dinfo->bus, dinfo->unit);
rs = true;
loc_push_none(&loc);
qemu_opts_loc_restore(dinfo->opts);
error_report("machine type does not support"
" if=%s,bus=%d,unit=%d",
if_name[dinfo->type], dinfo->bus, dinfo->unit);
loc_pop(&loc);
orphans = true;
}
}
return rs;
if (orphans) {
exit(1);
}
}
DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
@@ -554,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
if ((!file || !*file) && !qdict_size(bs_opts)) {
BlockBackendRootState *blk_rs;
blk = blk_new();
blk = blk_new(0, BLK_PERM_ALL);
blk_rs = blk_get_root_state(blk);
blk_rs->open_flags = bdrv_flags;
blk_rs->read_only = read_only;
@@ -1764,6 +1768,17 @@ static void external_snapshot_prepare(BlkActionState *common,
if (!state->new_bs->drv->supports_backing) {
error_setg(errp, "The snapshot does not support backing images");
return;
}
/* This removes our old bs and adds the new bs. This is an operation that
* can fail, so we need to do it in .prepare; undoing it for abort is
* always possible. */
bdrv_ref(state->new_bs);
bdrv_append(state->new_bs, state->old_bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
}
@@ -1774,8 +1789,6 @@ static void external_snapshot_commit(BlkActionState *common)
bdrv_set_aio_context(state->new_bs, state->aio_context);
/* This removes our old bs and adds the new bs */
bdrv_append(state->new_bs, state->old_bs);
/* We don't need (or want) to use the transactional
* bdrv_reopen_multiple() across all the entries at once, because we
* don't want to abort all of them if one of them fails the reopen */
@@ -1790,7 +1803,9 @@ static void external_snapshot_abort(BlkActionState *common)
ExternalSnapshotState *state =
DO_UPCAST(ExternalSnapshotState, common, common);
if (state->new_bs) {
bdrv_unref(state->new_bs);
if (state->new_bs->backing) {
bdrv_replace_in_backing_chain(state->new_bs, state->old_bs);
}
}
}
@@ -1801,6 +1816,7 @@ static void external_snapshot_clean(BlkActionState *common)
if (state->aio_context) {
bdrv_drained_end(state->old_bs);
aio_context_release(state->aio_context);
bdrv_unref(state->new_bs);
}
}
@@ -2307,7 +2323,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id,
}
if (!locked || force) {
blk_dev_change_media_cb(blk, false);
blk_dev_change_media_cb(blk, false, &error_abort);
}
if (locked && !force) {
@@ -2345,6 +2361,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
Error **errp)
{
BlockBackend *blk;
Error *local_err = NULL;
device = has_device ? device : NULL;
id = has_id ? id : NULL;
@@ -2368,7 +2385,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
return;
}
blk_dev_change_media_cb(blk, true);
blk_dev_change_media_cb(blk, true, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
}
void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
@@ -2421,7 +2442,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
* called at all); therefore, the medium needs to be ejected here.
* Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load
* value passed here (i.e. false). */
blk_dev_change_media_cb(blk, false);
blk_dev_change_media_cb(blk, false, &error_abort);
}
out:
@@ -2431,7 +2452,9 @@ out:
static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
BlockDriverState *bs, Error **errp)
{
Error *local_err = NULL;
bool has_device;
int ret;
/* For BBs without a device, we can exchange the BDS tree at will */
has_device = blk_get_attached_dev(blk);
@@ -2451,7 +2474,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
return;
}
blk_insert_bs(blk, bs);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
return;
}
if (!blk_dev_has_tray(blk)) {
/* For tray-less devices, blockdev-close-tray is a no-op (or may not be
@@ -2459,7 +2485,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
* slot here.
* Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load
* value passed here (i.e. true). */
blk_dev_change_media_cb(blk, true);
blk_dev_change_media_cb(blk, true, &local_err);
if (local_err) {
error_propagate(errp, local_err);
blk_remove_bs(blk);
return;
}
}
}
@@ -2855,6 +2886,7 @@ void qmp_block_resize(bool has_device, const char *device,
int64_t size, Error **errp)
{
Error *local_err = NULL;
BlockBackend *blk = NULL;
BlockDriverState *bs;
AioContext *aio_context;
int ret;
@@ -2885,10 +2917,16 @@ void qmp_block_resize(bool has_device, const char *device,
goto out;
}
blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
goto out;
}
/* complete all in-flight operations before resizing the device */
bdrv_drain_all();
ret = bdrv_truncate(bs, size);
ret = blk_truncate(blk, size);
switch (ret) {
case 0:
break;
@@ -2910,6 +2948,7 @@ void qmp_block_resize(bool has_device, const char *device,
}
out:
blk_unref(blk);
aio_context_release(aio_context);
}
@@ -3005,6 +3044,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
bool has_top, const char *top,
bool has_backing_file, const char *backing_file,
bool has_speed, int64_t speed,
bool has_filter_node_name, const char *filter_node_name,
Error **errp)
{
BlockDriverState *bs;
@@ -3020,6 +3060,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
if (!has_speed) {
speed = 0;
}
if (!has_filter_node_name) {
filter_node_name = NULL;
}
/* Important Note:
* libvirt relies on the DeviceNotFound error class in order to probe for
@@ -3094,8 +3137,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
goto out;
}
commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL,
&local_err, false);
BLOCK_JOB_DEFAULT, speed, on_error,
filter_node_name, NULL, NULL, &local_err, false);
} else {
BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
@@ -3103,7 +3146,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
}
commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
on_error, has_backing_file ? backing_file : NULL,
&local_err);
filter_node_name, &local_err);
}
if (local_err != NULL) {
error_propagate(errp, local_err);
@@ -3339,6 +3382,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
bool has_on_target_error,
BlockdevOnError on_target_error,
bool has_unmap, bool unmap,
bool has_filter_node_name,
const char *filter_node_name,
Error **errp)
{
@@ -3360,6 +3405,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_unmap) {
unmap = true;
}
if (!has_filter_node_name) {
filter_node_name = NULL;
}
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -3389,7 +3437,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
mirror_start(job_id, bs, target,
has_replaces ? replaces : NULL,
speed, granularity, buf_size, sync, backing_mode,
on_source_error, on_target_error, unmap, errp);
on_source_error, on_target_error, unmap, filter_node_name,
errp);
}
void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3527,6 +3576,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
arg->has_on_source_error, arg->on_source_error,
arg->has_on_target_error, arg->on_target_error,
arg->has_unmap, arg->unmap,
false, NULL,
&local_err);
bdrv_unref(target_bs);
error_propagate(errp, local_err);
@@ -3545,6 +3595,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
BlockdevOnError on_source_error,
bool has_on_target_error,
BlockdevOnError on_target_error,
bool has_filter_node_name,
const char *filter_node_name,
Error **errp)
{
BlockDriverState *bs;
@@ -3576,6 +3628,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
has_on_source_error, on_source_error,
has_on_target_error, on_target_error,
true, true,
has_filter_node_name, filter_node_name,
&local_err);
error_propagate(errp, local_err);
@@ -3999,83 +4052,11 @@ QemuOptsList qemu_common_drive_opts = {
.name = BDRV_OPT_READ_ONLY,
.type = QEMU_OPT_BOOL,
.help = "open drive file as read-only",
},{
.name = "throttling.iops-total",
.type = QEMU_OPT_NUMBER,
.help = "limit total I/O operations per second",
},{
.name = "throttling.iops-read",
.type = QEMU_OPT_NUMBER,
.help = "limit read operations per second",
},{
.name = "throttling.iops-write",
.type = QEMU_OPT_NUMBER,
.help = "limit write operations per second",
},{
.name = "throttling.bps-total",
.type = QEMU_OPT_NUMBER,
.help = "limit total bytes per second",
},{
.name = "throttling.bps-read",
.type = QEMU_OPT_NUMBER,
.help = "limit read bytes per second",
},{
.name = "throttling.bps-write",
.type = QEMU_OPT_NUMBER,
.help = "limit write bytes per second",
},{
.name = "throttling.iops-total-max",
.type = QEMU_OPT_NUMBER,
.help = "I/O operations burst",
},{
.name = "throttling.iops-read-max",
.type = QEMU_OPT_NUMBER,
.help = "I/O operations read burst",
},{
.name = "throttling.iops-write-max",
.type = QEMU_OPT_NUMBER,
.help = "I/O operations write burst",
},{
.name = "throttling.bps-total-max",
.type = QEMU_OPT_NUMBER,
.help = "total bytes burst",
},{
.name = "throttling.bps-read-max",
.type = QEMU_OPT_NUMBER,
.help = "total bytes read burst",
},{
.name = "throttling.bps-write-max",
.type = QEMU_OPT_NUMBER,
.help = "total bytes write burst",
},{
.name = "throttling.iops-total-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-total-max burst period, in seconds",
},{
.name = "throttling.iops-read-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-read-max burst period, in seconds",
},{
.name = "throttling.iops-write-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-write-max burst period, in seconds",
},{
.name = "throttling.bps-total-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-total-max burst period, in seconds",
},{
.name = "throttling.bps-read-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-read-max burst period, in seconds",
},{
.name = "throttling.bps-write-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-write-max burst period, in seconds",
},{
.name = "throttling.iops-size",
.type = QEMU_OPT_NUMBER,
.help = "when limiting by iops max size of an I/O in bytes",
},{
},
THROTTLE_OPTS,
{
.name = "throttling.group",
.type = QEMU_OPT_STRING,
.help = "name of the block throttling group",

View File

@@ -25,7 +25,6 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "trace.h"
#include "block/block.h"
#include "block/blockjob_int.h"
#include "block/block_int.h"
@@ -56,6 +55,19 @@ struct BlockJobTxn {
static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
static char *child_job_get_parent_desc(BdrvChild *c)
{
BlockJob *job = c->opaque;
return g_strdup_printf("%s job '%s'",
BlockJobType_lookup[job->driver->job_type],
job->id);
}
static const BdrvChildRole child_job = {
.get_parent_desc = child_job_get_parent_desc,
.stay_at_node = true,
};
BlockJob *block_job_next(BlockJob *job)
{
if (!job) {
@@ -116,19 +128,44 @@ static void block_job_detach_aio_context(void *opaque)
block_job_unref(job);
}
void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
void block_job_remove_all_bdrv(BlockJob *job)
{
job->nodes = g_slist_prepend(job->nodes, bs);
GSList *l;
for (l = job->nodes; l; l = l->next) {
BdrvChild *c = l->data;
bdrv_op_unblock_all(c->bs, job->blocker);
bdrv_root_unref_child(c);
}
g_slist_free(job->nodes);
job->nodes = NULL;
}
int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
uint64_t perm, uint64_t shared_perm, Error **errp)
{
BdrvChild *c;
c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
job, errp);
if (c == NULL) {
return -EPERM;
}
job->nodes = g_slist_prepend(job->nodes, c);
bdrv_ref(bs);
bdrv_op_block_all(bs, job->blocker);
return 0;
}
void *block_job_create(const char *job_id, const BlockJobDriver *driver,
BlockDriverState *bs, int64_t speed, int flags,
BlockDriverState *bs, uint64_t perm,
uint64_t shared_perm, int64_t speed, int flags,
BlockCompletionFunc *cb, void *opaque, Error **errp)
{
BlockBackend *blk;
BlockJob *job;
int ret;
if (bs->job) {
error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
@@ -160,13 +197,17 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
}
}
blk = blk_new();
blk_insert_bs(blk, bs);
blk = blk_new(perm, shared_perm);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
blk_unref(blk);
return NULL;
}
job = g_malloc0(driver->instance_size);
error_setg(&job->blocker, "block device is in use by block job: %s",
BlockJobType_lookup[driver->job_type]);
block_job_add_bdrv(job, bs);
block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
job->driver = driver;
@@ -229,15 +270,9 @@ void block_job_ref(BlockJob *job)
void block_job_unref(BlockJob *job)
{
if (--job->refcnt == 0) {
GSList *l;
BlockDriverState *bs = blk_bs(job->blk);
bs->job = NULL;
for (l = job->nodes; l; l = l->next) {
bs = l->data;
bdrv_op_unblock_all(bs, job->blocker);
bdrv_unref(bs);
}
g_slist_free(job->nodes);
block_job_remove_all_bdrv(job);
blk_remove_aio_context_notifier(job->blk,
block_job_attached_aio_context,
block_job_detach_aio_context, job);

17
chardev/Makefile.objs Normal file
View File

@@ -0,0 +1,17 @@
chardev-obj-y += char.o
chardev-obj-$(CONFIG_WIN32) += char-console.o
chardev-obj-$(CONFIG_POSIX) += char-fd.o
chardev-obj-y += char-file.o
chardev-obj-y += char-io.o
chardev-obj-y += char-mux.o
chardev-obj-y += char-null.o
chardev-obj-$(CONFIG_POSIX) += char-parallel.o
chardev-obj-y += char-pipe.o
chardev-obj-$(CONFIG_POSIX) += char-pty.o
chardev-obj-y += char-ringbuf.o
chardev-obj-y += char-serial.o
chardev-obj-y += char-socket.o
chardev-obj-y += char-stdio.o
chardev-obj-y += char-udp.o
chardev-obj-$(CONFIG_WIN32) += char-win.o
chardev-obj-$(CONFIG_WIN32) += char-win-stdio.o

53
chardev/char-console.c Normal file
View File

@@ -0,0 +1,53 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "char-win.h"
static void qemu_chr_open_win_con(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
qemu_chr_open_win_file(chr, GetStdHandle(STD_OUTPUT_HANDLE));
}
static void char_console_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->open = qemu_chr_open_win_con;
}
static const TypeInfo char_console_type_info = {
.name = TYPE_CHARDEV_CONSOLE,
.parent = TYPE_CHARDEV_WIN,
.class_init = char_console_class_init,
};
static void register_types(void)
{
type_register_static(&char_console_type_info);
}
type_init(register_types);

170
chardev/char-fd.c Normal file
View File

@@ -0,0 +1,170 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/sockets.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "io/channel-file.h"
#include "char-fd.h"
#include "char-io.h"
/* Called with chr_write_lock held. */
static int fd_chr_write(Chardev *chr, const uint8_t *buf, int len)
{
FDChardev *s = FD_CHARDEV(chr);
return io_channel_send(s->ioc_out, buf, len);
}
static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
{
Chardev *chr = CHARDEV(opaque);
FDChardev *s = FD_CHARDEV(opaque);
int len;
uint8_t buf[CHR_READ_BUF_LEN];
ssize_t ret;
len = sizeof(buf);
if (len > s->max_size) {
len = s->max_size;
}
if (len == 0) {
return TRUE;
}
ret = qio_channel_read(
chan, (gchar *)buf, len, NULL);
if (ret == 0) {
remove_fd_in_watch(chr);
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
return FALSE;
}
if (ret > 0) {
qemu_chr_be_write(chr, buf, ret);
}
return TRUE;
}
static int fd_chr_read_poll(void *opaque)
{
Chardev *chr = CHARDEV(opaque);
FDChardev *s = FD_CHARDEV(opaque);
s->max_size = qemu_chr_be_can_write(chr);
return s->max_size;
}
static GSource *fd_chr_add_watch(Chardev *chr, GIOCondition cond)
{
FDChardev *s = FD_CHARDEV(chr);
return qio_channel_create_watch(s->ioc_out, cond);
}
static void fd_chr_update_read_handler(Chardev *chr,
GMainContext *context)
{
FDChardev *s = FD_CHARDEV(chr);
remove_fd_in_watch(chr);
if (s->ioc_in) {
chr->fd_in_tag = io_add_watch_poll(chr, s->ioc_in,
fd_chr_read_poll,
fd_chr_read, chr,
context);
}
}
static void char_fd_finalize(Object *obj)
{
Chardev *chr = CHARDEV(obj);
FDChardev *s = FD_CHARDEV(obj);
remove_fd_in_watch(chr);
if (s->ioc_in) {
object_unref(OBJECT(s->ioc_in));
}
if (s->ioc_out) {
object_unref(OBJECT(s->ioc_out));
}
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
}
int qmp_chardev_open_file_source(char *src, int flags, Error **errp)
{
int fd = -1;
TFR(fd = qemu_open(src, flags, 0666));
if (fd == -1) {
error_setg_file_open(errp, errno, src);
}
return fd;
}
/* open a character device to a unix fd */
void qemu_chr_open_fd(Chardev *chr,
int fd_in, int fd_out)
{
FDChardev *s = FD_CHARDEV(chr);
char *name;
s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in));
name = g_strdup_printf("chardev-file-in-%s", chr->label);
qio_channel_set_name(QIO_CHANNEL(s->ioc_in), name);
g_free(name);
s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out));
name = g_strdup_printf("chardev-file-out-%s", chr->label);
qio_channel_set_name(QIO_CHANNEL(s->ioc_out), name);
g_free(name);
qemu_set_nonblock(fd_out);
s->chr = chr;
}
static void char_fd_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->chr_add_watch = fd_chr_add_watch;
cc->chr_write = fd_chr_write;
cc->chr_update_read_handler = fd_chr_update_read_handler;
}
static const TypeInfo char_fd_type_info = {
.name = TYPE_CHARDEV_FD,
.parent = TYPE_CHARDEV,
.instance_size = sizeof(FDChardev),
.instance_finalize = char_fd_finalize,
.class_init = char_fd_class_init,
.abstract = true,
};
static void register_types(void)
{
type_register_static(&char_fd_type_info);
}
type_init(register_types);

44
chardev/char-fd.h Normal file
View File

@@ -0,0 +1,44 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef CHAR_FD_H
#define CHAR_FD_H
#include "io/channel.h"
#include "sysemu/char.h"
typedef struct FDChardev {
Chardev parent;
Chardev *chr;
QIOChannel *ioc_in, *ioc_out;
int max_size;
} FDChardev;
#define TYPE_CHARDEV_FD "chardev-fd"
#define FD_CHARDEV(obj) OBJECT_CHECK(FDChardev, (obj), TYPE_CHARDEV_FD)
void qemu_chr_open_fd(Chardev *chr, int fd_in, int fd_out);
int qmp_chardev_open_file_source(char *src, int flags, Error **errp);
#endif /* CHAR_FD_H */

139
chardev/char-file.c Normal file
View File

@@ -0,0 +1,139 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#ifdef _WIN32
#include "char-win.h"
#else
#include "char-fd.h"
#endif
static void qmp_chardev_open_file(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
ChardevFile *file = backend->u.file.data;
#ifdef _WIN32
HANDLE out;
DWORD accessmode;
DWORD flags;
if (file->has_in) {
error_setg(errp, "input file not supported");
return;
}
if (file->has_append && file->append) {
/* Append to file if it already exists. */
accessmode = FILE_GENERIC_WRITE & ~FILE_WRITE_DATA;
flags = OPEN_ALWAYS;
} else {
/* Truncate file if it already exists. */
accessmode = GENERIC_WRITE;
flags = CREATE_ALWAYS;
}
out = CreateFile(file->out, accessmode, FILE_SHARE_READ, NULL, flags,
FILE_ATTRIBUTE_NORMAL, NULL);
if (out == INVALID_HANDLE_VALUE) {
error_setg(errp, "open %s failed", file->out);
return;
}
qemu_chr_open_win_file(chr, out);
#else
int flags, in = -1, out;
flags = O_WRONLY | O_CREAT | O_BINARY;
if (file->has_append && file->append) {
flags |= O_APPEND;
} else {
flags |= O_TRUNC;
}
out = qmp_chardev_open_file_source(file->out, flags, errp);
if (out < 0) {
return;
}
if (file->has_in) {
flags = O_RDONLY;
in = qmp_chardev_open_file_source(file->in, flags, errp);
if (in < 0) {
qemu_close(out);
return;
}
}
qemu_chr_open_fd(chr, in, out);
#endif
}
static void qemu_chr_parse_file_out(QemuOpts *opts, ChardevBackend *backend,
Error **errp)
{
const char *path = qemu_opt_get(opts, "path");
ChardevFile *file;
backend->type = CHARDEV_BACKEND_KIND_FILE;
if (path == NULL) {
error_setg(errp, "chardev: file: no filename given");
return;
}
file = backend->u.file.data = g_new0(ChardevFile, 1);
qemu_chr_parse_common(opts, qapi_ChardevFile_base(file));
file->out = g_strdup(path);
file->has_append = true;
file->append = qemu_opt_get_bool(opts, "append", false);
}
static void char_file_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->parse = qemu_chr_parse_file_out;
cc->open = qmp_chardev_open_file;
}
static const TypeInfo char_file_type_info = {
.name = TYPE_CHARDEV_FILE,
#ifdef _WIN32
.parent = TYPE_CHARDEV_WIN,
#else
.parent = TYPE_CHARDEV_FD,
#endif
.class_init = char_file_class_init,
};
static void register_types(void)
{
type_register_static(&char_file_type_info);
}
type_init(register_types);

192
chardev/char-io.c Normal file
View File

@@ -0,0 +1,192 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "char-io.h"
typedef struct IOWatchPoll {
GSource parent;
QIOChannel *ioc;
GSource *src;
IOCanReadHandler *fd_can_read;
GSourceFunc fd_read;
void *opaque;
GMainContext *context;
} IOWatchPoll;
static IOWatchPoll *io_watch_poll_from_source(GSource *source)
{
return container_of(source, IOWatchPoll, parent);
}
static gboolean io_watch_poll_prepare(GSource *source,
gint *timeout)
{
IOWatchPoll *iwp = io_watch_poll_from_source(source);
bool now_active = iwp->fd_can_read(iwp->opaque) > 0;
bool was_active = iwp->src != NULL;
if (was_active == now_active) {
return FALSE;
}
if (now_active) {
iwp->src = qio_channel_create_watch(
iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
g_source_attach(iwp->src, iwp->context);
} else {
g_source_destroy(iwp->src);
g_source_unref(iwp->src);
iwp->src = NULL;
}
return FALSE;
}
static gboolean io_watch_poll_check(GSource *source)
{
return FALSE;
}
static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
gpointer user_data)
{
abort();
}
static void io_watch_poll_finalize(GSource *source)
{
/* Due to a glib bug, removing the last reference to a source
* inside a finalize callback causes recursive locking (and a
* deadlock). This is not a problem inside other callbacks,
* including dispatch callbacks, so we call io_remove_watch_poll
* to remove this source. At this point, iwp->src must
* be NULL, or we would leak it.
*
* This would be solved much more elegantly by child sources,
* but we support older glib versions that do not have them.
*/
IOWatchPoll *iwp = io_watch_poll_from_source(source);
assert(iwp->src == NULL);
}
static GSourceFuncs io_watch_poll_funcs = {
.prepare = io_watch_poll_prepare,
.check = io_watch_poll_check,
.dispatch = io_watch_poll_dispatch,
.finalize = io_watch_poll_finalize,
};
guint io_add_watch_poll(Chardev *chr,
QIOChannel *ioc,
IOCanReadHandler *fd_can_read,
QIOChannelFunc fd_read,
gpointer user_data,
GMainContext *context)
{
IOWatchPoll *iwp;
int tag;
char *name;
iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs,
sizeof(IOWatchPoll));
iwp->fd_can_read = fd_can_read;
iwp->opaque = user_data;
iwp->ioc = ioc;
iwp->fd_read = (GSourceFunc) fd_read;
iwp->src = NULL;
iwp->context = context;
name = g_strdup_printf("chardev-iowatch-%s", chr->label);
g_source_set_name((GSource *)iwp, name);
g_free(name);
tag = g_source_attach(&iwp->parent, context);
g_source_unref(&iwp->parent);
return tag;
}
static void io_remove_watch_poll(guint tag)
{
GSource *source;
IOWatchPoll *iwp;
g_return_if_fail(tag > 0);
source = g_main_context_find_source_by_id(NULL, tag);
g_return_if_fail(source != NULL);
iwp = io_watch_poll_from_source(source);
if (iwp->src) {
g_source_destroy(iwp->src);
g_source_unref(iwp->src);
iwp->src = NULL;
}
g_source_destroy(&iwp->parent);
}
void remove_fd_in_watch(Chardev *chr)
{
if (chr->fd_in_tag) {
io_remove_watch_poll(chr->fd_in_tag);
chr->fd_in_tag = 0;
}
}
int io_channel_send_full(QIOChannel *ioc,
const void *buf, size_t len,
int *fds, size_t nfds)
{
size_t offset = 0;
while (offset < len) {
ssize_t ret = 0;
struct iovec iov = { .iov_base = (char *)buf + offset,
.iov_len = len - offset };
ret = qio_channel_writev_full(
ioc, &iov, 1,
fds, nfds, NULL);
if (ret == QIO_CHANNEL_ERR_BLOCK) {
if (offset) {
return offset;
}
errno = EAGAIN;
return -1;
} else if (ret < 0) {
errno = EINVAL;
return -1;
}
offset += ret;
}
return offset;
}
int io_channel_send(QIOChannel *ioc, const void *buf, size_t len)
{
return io_channel_send_full(ioc, buf, len, NULL, 0);
}

46
chardev/char-io.h Normal file
View File

@@ -0,0 +1,46 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef CHAR_IO_H
#define CHAR_IO_H
#include "qemu-common.h"
#include "io/channel.h"
#include "sysemu/char.h"
/* Can only be used for read */
guint io_add_watch_poll(Chardev *chr,
QIOChannel *ioc,
IOCanReadHandler *fd_can_read,
QIOChannelFunc fd_read,
gpointer user_data,
GMainContext *context);
void remove_fd_in_watch(Chardev *chr);
int io_channel_send(QIOChannel *ioc, const void *buf, size_t len);
int io_channel_send_full(QIOChannel *ioc, const void *buf, size_t len,
int *fds, size_t nfds);
#endif /* CHAR_IO_H */

358
chardev/char-mux.c Normal file
View File

@@ -0,0 +1,358 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "sysemu/block-backend.h"
#include "char-mux.h"
/* MUX driver for serial I/O splitting */
/* Called with chr_write_lock held. */
static int mux_chr_write(Chardev *chr, const uint8_t *buf, int len)
{
MuxChardev *d = MUX_CHARDEV(chr);
int ret;
if (!d->timestamps) {
ret = qemu_chr_fe_write(&d->chr, buf, len);
} else {
int i;
ret = 0;
for (i = 0; i < len; i++) {
if (d->linestart) {
char buf1[64];
int64_t ti;
int secs;
ti = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
if (d->timestamps_start == -1) {
d->timestamps_start = ti;
}
ti -= d->timestamps_start;
secs = ti / 1000;
snprintf(buf1, sizeof(buf1),
"[%02d:%02d:%02d.%03d] ",
secs / 3600,
(secs / 60) % 60,
secs % 60,
(int)(ti % 1000));
/* XXX this blocks entire thread. Rewrite to use
* qemu_chr_fe_write and background I/O callbacks */
qemu_chr_fe_write_all(&d->chr,
(uint8_t *)buf1, strlen(buf1));
d->linestart = 0;
}
ret += qemu_chr_fe_write(&d->chr, buf + i, 1);
if (buf[i] == '\n') {
d->linestart = 1;
}
}
}
return ret;
}
static const char * const mux_help[] = {
"% h print this help\n\r",
"% x exit emulator\n\r",
"% s save disk data back to file (if -snapshot)\n\r",
"% t toggle console timestamps\n\r",
"% b send break (magic sysrq)\n\r",
"% c switch between console and monitor\n\r",
"% % sends %\n\r",
NULL
};
int term_escape_char = 0x01; /* ctrl-a is used for escape */
static void mux_print_help(Chardev *chr)
{
int i, j;
char ebuf[15] = "Escape-Char";
char cbuf[50] = "\n\r";
if (term_escape_char > 0 && term_escape_char < 26) {
snprintf(cbuf, sizeof(cbuf), "\n\r");
snprintf(ebuf, sizeof(ebuf), "C-%c", term_escape_char - 1 + 'a');
} else {
snprintf(cbuf, sizeof(cbuf),
"\n\rEscape-Char set to Ascii: 0x%02x\n\r\n\r",
term_escape_char);
}
/* XXX this blocks entire thread. Rewrite to use
* qemu_chr_fe_write and background I/O callbacks */
qemu_chr_write_all(chr, (uint8_t *)cbuf, strlen(cbuf));
for (i = 0; mux_help[i] != NULL; i++) {
for (j = 0; mux_help[i][j] != '\0'; j++) {
if (mux_help[i][j] == '%') {
qemu_chr_write_all(chr, (uint8_t *)ebuf, strlen(ebuf));
} else {
qemu_chr_write_all(chr, (uint8_t *)&mux_help[i][j], 1);
}
}
}
}
void mux_chr_send_event(MuxChardev *d, int mux_nr, int event)
{
CharBackend *be = d->backends[mux_nr];
if (be && be->chr_event) {
be->chr_event(be->opaque, event);
}
}
static int mux_proc_byte(Chardev *chr, MuxChardev *d, int ch)
{
if (d->term_got_escape) {
d->term_got_escape = 0;
if (ch == term_escape_char) {
goto send_char;
}
switch (ch) {
case '?':
case 'h':
mux_print_help(chr);
break;
case 'x':
{
const char *term = "QEMU: Terminated\n\r";
qemu_chr_write_all(chr, (uint8_t *)term, strlen(term));
exit(0);
break;
}
case 's':
blk_commit_all();
break;
case 'b':
qemu_chr_be_event(chr, CHR_EVENT_BREAK);
break;
case 'c':
assert(d->mux_cnt > 0); /* handler registered with first fe */
/* Switch to the next registered device */
mux_set_focus(chr, (d->focus + 1) % d->mux_cnt);
break;
case 't':
d->timestamps = !d->timestamps;
d->timestamps_start = -1;
d->linestart = 0;
break;
}
} else if (ch == term_escape_char) {
d->term_got_escape = 1;
} else {
send_char:
return 1;
}
return 0;
}
static void mux_chr_accept_input(Chardev *chr)
{
MuxChardev *d = MUX_CHARDEV(chr);
int m = d->focus;
CharBackend *be = d->backends[m];
while (be && d->prod[m] != d->cons[m] &&
be->chr_can_read && be->chr_can_read(be->opaque)) {
be->chr_read(be->opaque,
&d->buffer[m][d->cons[m]++ & MUX_BUFFER_MASK], 1);
}
}
static int mux_chr_can_read(void *opaque)
{
MuxChardev *d = MUX_CHARDEV(opaque);
int m = d->focus;
CharBackend *be = d->backends[m];
if ((d->prod[m] - d->cons[m]) < MUX_BUFFER_SIZE) {
return 1;
}
if (be && be->chr_can_read) {
return be->chr_can_read(be->opaque);
}
return 0;
}
static void mux_chr_read(void *opaque, const uint8_t *buf, int size)
{
Chardev *chr = CHARDEV(opaque);
MuxChardev *d = MUX_CHARDEV(opaque);
int m = d->focus;
CharBackend *be = d->backends[m];
int i;
mux_chr_accept_input(opaque);
for (i = 0; i < size; i++)
if (mux_proc_byte(chr, d, buf[i])) {
if (d->prod[m] == d->cons[m] &&
be && be->chr_can_read &&
be->chr_can_read(be->opaque)) {
be->chr_read(be->opaque, &buf[i], 1);
} else {
d->buffer[m][d->prod[m]++ & MUX_BUFFER_MASK] = buf[i];
}
}
}
bool muxes_realized;
static void mux_chr_event(void *opaque, int event)
{
MuxChardev *d = MUX_CHARDEV(opaque);
int i;
if (!muxes_realized) {
return;
}
/* Send the event to all registered listeners */
for (i = 0; i < d->mux_cnt; i++) {
mux_chr_send_event(d, i, event);
}
}
static GSource *mux_chr_add_watch(Chardev *s, GIOCondition cond)
{
MuxChardev *d = MUX_CHARDEV(s);
Chardev *chr = qemu_chr_fe_get_driver(&d->chr);
ChardevClass *cc = CHARDEV_GET_CLASS(chr);
if (!cc->chr_add_watch) {
return NULL;
}
return cc->chr_add_watch(chr, cond);
}
static void char_mux_finalize(Object *obj)
{
MuxChardev *d = MUX_CHARDEV(obj);
int i;
for (i = 0; i < d->mux_cnt; i++) {
CharBackend *be = d->backends[i];
if (be) {
be->chr = NULL;
}
}
qemu_chr_fe_deinit(&d->chr);
}
void mux_chr_set_handlers(Chardev *chr, GMainContext *context)
{
MuxChardev *d = MUX_CHARDEV(chr);
/* Fix up the real driver with mux routines */
qemu_chr_fe_set_handlers(&d->chr,
mux_chr_can_read,
mux_chr_read,
mux_chr_event,
chr,
context, true);
}
void mux_set_focus(Chardev *chr, int focus)
{
MuxChardev *d = MUX_CHARDEV(chr);
assert(focus >= 0);
assert(focus < d->mux_cnt);
if (d->focus != -1) {
mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
}
d->focus = focus;
mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_IN);
}
static void qemu_chr_open_mux(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
ChardevMux *mux = backend->u.mux.data;
Chardev *drv;
MuxChardev *d = MUX_CHARDEV(chr);
drv = qemu_chr_find(mux->chardev);
if (drv == NULL) {
error_setg(errp, "mux: base chardev %s not found", mux->chardev);
return;
}
d->focus = -1;
/* only default to opened state if we've realized the initial
* set of muxes
*/
*be_opened = muxes_realized;
qemu_chr_fe_init(&d->chr, drv, errp);
}
static void qemu_chr_parse_mux(QemuOpts *opts, ChardevBackend *backend,
Error **errp)
{
const char *chardev = qemu_opt_get(opts, "chardev");
ChardevMux *mux;
if (chardev == NULL) {
error_setg(errp, "chardev: mux: no chardev given");
return;
}
backend->type = CHARDEV_BACKEND_KIND_MUX;
mux = backend->u.mux.data = g_new0(ChardevMux, 1);
qemu_chr_parse_common(opts, qapi_ChardevMux_base(mux));
mux->chardev = g_strdup(chardev);
}
static void char_mux_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->parse = qemu_chr_parse_mux;
cc->open = qemu_chr_open_mux;
cc->chr_write = mux_chr_write;
cc->chr_accept_input = mux_chr_accept_input;
cc->chr_add_watch = mux_chr_add_watch;
}
static const TypeInfo char_mux_type_info = {
.name = TYPE_CHARDEV_MUX,
.parent = TYPE_CHARDEV,
.class_init = char_mux_class_init,
.instance_size = sizeof(MuxChardev),
.instance_finalize = char_mux_finalize,
};
static void register_types(void)
{
type_register_static(&char_mux_type_info);
}
type_init(register_types);

63
chardev/char-mux.h Normal file
View File

@@ -0,0 +1,63 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef CHAR_MUX_H
#define CHAR_MUX_H
#include "sysemu/char.h"
extern bool muxes_realized;
#define MAX_MUX 4
#define MUX_BUFFER_SIZE 32 /* Must be a power of 2. */
#define MUX_BUFFER_MASK (MUX_BUFFER_SIZE - 1)
typedef struct MuxChardev {
Chardev parent;
CharBackend *backends[MAX_MUX];
CharBackend chr;
int focus;
int mux_cnt;
int term_got_escape;
int max_size;
/* Intermediate input buffer catches escape sequences even if the
currently active device is not accepting any input - but only until it
is full as well. */
unsigned char buffer[MAX_MUX][MUX_BUFFER_SIZE];
int prod[MAX_MUX];
int cons[MAX_MUX];
int timestamps;
/* Protected by the Chardev chr_write_lock. */
int linestart;
int64_t timestamps_start;
} MuxChardev;
#define MUX_CHARDEV(obj) OBJECT_CHECK(MuxChardev, (obj), TYPE_CHARDEV_MUX)
#define CHARDEV_IS_MUX(chr) \
object_dynamic_cast(OBJECT(chr), TYPE_CHARDEV_MUX)
void mux_chr_set_handlers(Chardev *chr, GMainContext *context);
void mux_set_focus(Chardev *chr, int focus);
void mux_chr_send_event(MuxChardev *d, int mux_nr, int event);
#endif /* CHAR_MUX_H */

54
chardev/char-null.c Normal file
View File

@@ -0,0 +1,54 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "sysemu/char.h"
static void null_chr_open(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
*be_opened = false;
}
static void char_null_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->open = null_chr_open;
}
static const TypeInfo char_null_type_info = {
.name = TYPE_CHARDEV_NULL,
.parent = TYPE_CHARDEV,
.instance_size = sizeof(Chardev),
.class_init = char_null_class_init,
};
static void register_types(void)
{
type_register_static(&char_null_type_info);
}
type_init(register_types);

316
chardev/char-parallel.c Normal file
View File

@@ -0,0 +1,316 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "sysemu/char.h"
#include "qapi/error.h"
#include <sys/ioctl.h>
#ifdef CONFIG_BSD
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
#include <dev/ppbus/ppi.h>
#include <dev/ppbus/ppbconf.h>
#elif defined(__DragonFly__)
#include <dev/misc/ppi/ppi.h>
#include <bus/ppbus/ppbconf.h>
#endif
#else
#ifdef __linux__
#include <linux/ppdev.h>
#include <linux/parport.h>
#endif
#endif
#include "char-fd.h"
#include "char-parallel.h"
#if defined(__linux__)
typedef struct {
Chardev parent;
int fd;
int mode;
} ParallelChardev;
#define PARALLEL_CHARDEV(obj) \
OBJECT_CHECK(ParallelChardev, (obj), TYPE_CHARDEV_PARALLEL)
static int pp_hw_mode(ParallelChardev *s, uint16_t mode)
{
if (s->mode != mode) {
int m = mode;
if (ioctl(s->fd, PPSETMODE, &m) < 0) {
return 0;
}
s->mode = mode;
}
return 1;
}
static int pp_ioctl(Chardev *chr, int cmd, void *arg)
{
ParallelChardev *drv = PARALLEL_CHARDEV(chr);
int fd = drv->fd;
uint8_t b;
switch (cmd) {
case CHR_IOCTL_PP_READ_DATA:
if (ioctl(fd, PPRDATA, &b) < 0) {
return -ENOTSUP;
}
*(uint8_t *)arg = b;
break;
case CHR_IOCTL_PP_WRITE_DATA:
b = *(uint8_t *)arg;
if (ioctl(fd, PPWDATA, &b) < 0) {
return -ENOTSUP;
}
break;
case CHR_IOCTL_PP_READ_CONTROL:
if (ioctl(fd, PPRCONTROL, &b) < 0) {
return -ENOTSUP;
}
/* Linux gives only the lowest bits, and no way to know data
direction! For better compatibility set the fixed upper
bits. */
*(uint8_t *)arg = b | 0xc0;
break;
case CHR_IOCTL_PP_WRITE_CONTROL:
b = *(uint8_t *)arg;
if (ioctl(fd, PPWCONTROL, &b) < 0) {
return -ENOTSUP;
}
break;
case CHR_IOCTL_PP_READ_STATUS:
if (ioctl(fd, PPRSTATUS, &b) < 0) {
return -ENOTSUP;
}
*(uint8_t *)arg = b;
break;
case CHR_IOCTL_PP_DATA_DIR:
if (ioctl(fd, PPDATADIR, (int *)arg) < 0) {
return -ENOTSUP;
}
break;
case CHR_IOCTL_PP_EPP_READ_ADDR:
if (pp_hw_mode(drv, IEEE1284_MODE_EPP | IEEE1284_ADDR)) {
struct ParallelIOArg *parg = arg;
int n = read(fd, parg->buffer, parg->count);
if (n != parg->count) {
return -EIO;
}
}
break;
case CHR_IOCTL_PP_EPP_READ:
if (pp_hw_mode(drv, IEEE1284_MODE_EPP)) {
struct ParallelIOArg *parg = arg;
int n = read(fd, parg->buffer, parg->count);
if (n != parg->count) {
return -EIO;
}
}
break;
case CHR_IOCTL_PP_EPP_WRITE_ADDR:
if (pp_hw_mode(drv, IEEE1284_MODE_EPP | IEEE1284_ADDR)) {
struct ParallelIOArg *parg = arg;
int n = write(fd, parg->buffer, parg->count);
if (n != parg->count) {
return -EIO;
}
}
break;
case CHR_IOCTL_PP_EPP_WRITE:
if (pp_hw_mode(drv, IEEE1284_MODE_EPP)) {
struct ParallelIOArg *parg = arg;
int n = write(fd, parg->buffer, parg->count);
if (n != parg->count) {
return -EIO;
}
}
break;
default:
return -ENOTSUP;
}
return 0;
}
static void qemu_chr_open_pp_fd(Chardev *chr,
int fd,
bool *be_opened,
Error **errp)
{
ParallelChardev *drv = PARALLEL_CHARDEV(chr);
if (ioctl(fd, PPCLAIM) < 0) {
error_setg_errno(errp, errno, "not a parallel port");
close(fd);
return;
}
drv->fd = fd;
drv->mode = IEEE1284_MODE_COMPAT;
}
#endif /* __linux__ */
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
typedef struct {
Chardev parent;
int fd;
} ParallelChardev;
#define PARALLEL_CHARDEV(obj) \
OBJECT_CHECK(ParallelChardev, (obj), TYPE_CHARDEV_PARALLEL)
static int pp_ioctl(Chardev *chr, int cmd, void *arg)
{
ParallelChardev *drv = PARALLEL_CHARDEV(chr);
uint8_t b;
switch (cmd) {
case CHR_IOCTL_PP_READ_DATA:
if (ioctl(drv->fd, PPIGDATA, &b) < 0) {
return -ENOTSUP;
}
*(uint8_t *)arg = b;
break;
case CHR_IOCTL_PP_WRITE_DATA:
b = *(uint8_t *)arg;
if (ioctl(drv->fd, PPISDATA, &b) < 0) {
return -ENOTSUP;
}
break;
case CHR_IOCTL_PP_READ_CONTROL:
if (ioctl(drv->fd, PPIGCTRL, &b) < 0) {
return -ENOTSUP;
}
*(uint8_t *)arg = b;
break;
case CHR_IOCTL_PP_WRITE_CONTROL:
b = *(uint8_t *)arg;
if (ioctl(drv->fd, PPISCTRL, &b) < 0) {
return -ENOTSUP;
}
break;
case CHR_IOCTL_PP_READ_STATUS:
if (ioctl(drv->fd, PPIGSTATUS, &b) < 0) {
return -ENOTSUP;
}
*(uint8_t *)arg = b;
break;
default:
return -ENOTSUP;
}
return 0;
}
static void qemu_chr_open_pp_fd(Chardev *chr,
int fd,
bool *be_opened,
Error **errp)
{
ParallelChardev *drv = PARALLEL_CHARDEV(chr);
drv->fd = fd;
*be_opened = false;
}
#endif
#ifdef HAVE_CHARDEV_PARPORT
static void qmp_chardev_open_parallel(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
ChardevHostdev *parallel = backend->u.parallel.data;
int fd;
fd = qmp_chardev_open_file_source(parallel->device, O_RDWR, errp);
if (fd < 0) {
return;
}
qemu_chr_open_pp_fd(chr, fd, be_opened, errp);
}
static void qemu_chr_parse_parallel(QemuOpts *opts, ChardevBackend *backend,
Error **errp)
{
const char *device = qemu_opt_get(opts, "path");
ChardevHostdev *parallel;
if (device == NULL) {
error_setg(errp, "chardev: parallel: no device path given");
return;
}
backend->type = CHARDEV_BACKEND_KIND_PARALLEL;
parallel = backend->u.parallel.data = g_new0(ChardevHostdev, 1);
qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(parallel));
parallel->device = g_strdup(device);
}
static void char_parallel_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->parse = qemu_chr_parse_parallel;
cc->open = qmp_chardev_open_parallel;
#if defined(__linux__)
cc->chr_ioctl = pp_ioctl;
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
defined(__DragonFly__)
cc->chr_ioctl = pp_ioctl;
#endif
}
static void char_parallel_finalize(Object *obj)
{
#if defined(__linux__)
Chardev *chr = CHARDEV(obj);
ParallelChardev *drv = PARALLEL_CHARDEV(chr);
int fd = drv->fd;
pp_hw_mode(drv, IEEE1284_MODE_COMPAT);
ioctl(fd, PPRELEASE);
close(fd);
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
defined(__DragonFly__)
/* FIXME: close fd? */
#endif
}
static const TypeInfo char_parallel_type_info = {
.name = TYPE_CHARDEV_PARALLEL,
.parent = TYPE_CHARDEV,
.instance_size = sizeof(ParallelChardev),
.instance_finalize = char_parallel_finalize,
.class_init = char_parallel_class_init,
};
static void register_types(void)
{
type_register_static(&char_parallel_type_info);
}
type_init(register_types);
#endif

32
chardev/char-parallel.h Normal file
View File

@@ -0,0 +1,32 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef CHAR_PARALLEL_H
#define CHAR_PARALLEL_H
#if defined(__linux__) || defined(__FreeBSD__) || \
defined(__FreeBSD_kernel__) || defined(__DragonFly__)
#define HAVE_CHARDEV_PARPORT 1
#endif
#endif /* CHAR_PARALLEL_H */

191
chardev/char-pipe.c Normal file
View File

@@ -0,0 +1,191 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "sysemu/char.h"
#ifdef _WIN32
#include "char-win.h"
#else
#include "char-fd.h"
#endif
#ifdef _WIN32
#define MAXCONNECT 1
#define NTIMEOUT 5000
static int win_chr_pipe_init(Chardev *chr, const char *filename,
Error **errp)
{
WinChardev *s = WIN_CHARDEV(chr);
OVERLAPPED ov;
int ret;
DWORD size;
char *openname;
s->fpipe = TRUE;
s->hsend = CreateEvent(NULL, TRUE, FALSE, NULL);
if (!s->hsend) {
error_setg(errp, "Failed CreateEvent");
goto fail;
}
s->hrecv = CreateEvent(NULL, TRUE, FALSE, NULL);
if (!s->hrecv) {
error_setg(errp, "Failed CreateEvent");
goto fail;
}
openname = g_strdup_printf("\\\\.\\pipe\\%s", filename);
s->hcom = CreateNamedPipe(openname,
PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED,
PIPE_TYPE_BYTE | PIPE_READMODE_BYTE |
PIPE_WAIT,
MAXCONNECT, NSENDBUF, NRECVBUF, NTIMEOUT, NULL);
g_free(openname);
if (s->hcom == INVALID_HANDLE_VALUE) {
error_setg(errp, "Failed CreateNamedPipe (%lu)", GetLastError());
s->hcom = NULL;
goto fail;
}
ZeroMemory(&ov, sizeof(ov));
ov.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
ret = ConnectNamedPipe(s->hcom, &ov);
if (ret) {
error_setg(errp, "Failed ConnectNamedPipe");
goto fail;
}
ret = GetOverlappedResult(s->hcom, &ov, &size, TRUE);
if (!ret) {
error_setg(errp, "Failed GetOverlappedResult");
if (ov.hEvent) {
CloseHandle(ov.hEvent);
ov.hEvent = NULL;
}
goto fail;
}
if (ov.hEvent) {
CloseHandle(ov.hEvent);
ov.hEvent = NULL;
}
qemu_add_polling_cb(win_chr_pipe_poll, chr);
return 0;
fail:
return -1;
}
static void qemu_chr_open_pipe(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
ChardevHostdev *opts = backend->u.pipe.data;
const char *filename = opts->device;
if (win_chr_pipe_init(chr, filename, errp) < 0) {
return;
}
}
#else
static void qemu_chr_open_pipe(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
ChardevHostdev *opts = backend->u.pipe.data;
int fd_in, fd_out;
char *filename_in;
char *filename_out;
const char *filename = opts->device;
filename_in = g_strdup_printf("%s.in", filename);
filename_out = g_strdup_printf("%s.out", filename);
TFR(fd_in = qemu_open(filename_in, O_RDWR | O_BINARY));
TFR(fd_out = qemu_open(filename_out, O_RDWR | O_BINARY));
g_free(filename_in);
g_free(filename_out);
if (fd_in < 0 || fd_out < 0) {
if (fd_in >= 0) {
close(fd_in);
}
if (fd_out >= 0) {
close(fd_out);
}
TFR(fd_in = fd_out = qemu_open(filename, O_RDWR | O_BINARY));
if (fd_in < 0) {
error_setg_file_open(errp, errno, filename);
return;
}
}
qemu_chr_open_fd(chr, fd_in, fd_out);
}
#endif /* !_WIN32 */
static void qemu_chr_parse_pipe(QemuOpts *opts, ChardevBackend *backend,
Error **errp)
{
const char *device = qemu_opt_get(opts, "path");
ChardevHostdev *dev;
if (device == NULL) {
error_setg(errp, "chardev: pipe: no device path given");
return;
}
backend->type = CHARDEV_BACKEND_KIND_PIPE;
dev = backend->u.pipe.data = g_new0(ChardevHostdev, 1);
qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(dev));
dev->device = g_strdup(device);
}
static void char_pipe_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->parse = qemu_chr_parse_pipe;
cc->open = qemu_chr_open_pipe;
}
static const TypeInfo char_pipe_type_info = {
.name = TYPE_CHARDEV_PIPE,
#ifdef _WIN32
.parent = TYPE_CHARDEV_WIN,
#else
.parent = TYPE_CHARDEV_FD,
#endif
.class_init = char_pipe_class_init,
};
static void register_types(void)
{
type_register_static(&char_pipe_type_info);
}
type_init(register_types);

300
chardev/char-pty.c Normal file
View File

@@ -0,0 +1,300 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "io/channel-file.h"
#include "qemu/sockets.h"
#include "qemu/error-report.h"
#include "char-io.h"
#if defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \
|| defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
|| defined(__GLIBC__)
typedef struct {
Chardev parent;
QIOChannel *ioc;
int read_bytes;
/* Protected by the Chardev chr_write_lock. */
int connected;
guint timer_tag;
guint open_tag;
} PtyChardev;
#define PTY_CHARDEV(obj) OBJECT_CHECK(PtyChardev, (obj), TYPE_CHARDEV_PTY)
static void pty_chr_update_read_handler_locked(Chardev *chr);
static void pty_chr_state(Chardev *chr, int connected);
static gboolean pty_chr_timer(gpointer opaque)
{
struct Chardev *chr = CHARDEV(opaque);
PtyChardev *s = PTY_CHARDEV(opaque);
qemu_mutex_lock(&chr->chr_write_lock);
s->timer_tag = 0;
s->open_tag = 0;
if (!s->connected) {
/* Next poll ... */
pty_chr_update_read_handler_locked(chr);
}
qemu_mutex_unlock(&chr->chr_write_lock);
return FALSE;
}
/* Called with chr_write_lock held. */
static void pty_chr_rearm_timer(Chardev *chr, int ms)
{
PtyChardev *s = PTY_CHARDEV(chr);
char *name;
if (s->timer_tag) {
g_source_remove(s->timer_tag);
s->timer_tag = 0;
}
if (ms == 1000) {
name = g_strdup_printf("pty-timer-secs-%s", chr->label);
s->timer_tag = g_timeout_add_seconds(1, pty_chr_timer, chr);
} else {
name = g_strdup_printf("pty-timer-ms-%s", chr->label);
s->timer_tag = g_timeout_add(ms, pty_chr_timer, chr);
}
g_source_set_name_by_id(s->timer_tag, name);
g_free(name);
}
/* Called with chr_write_lock held. */
static void pty_chr_update_read_handler_locked(Chardev *chr)
{
PtyChardev *s = PTY_CHARDEV(chr);
GPollFD pfd;
int rc;
QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc);
pfd.fd = fioc->fd;
pfd.events = G_IO_OUT;
pfd.revents = 0;
do {
rc = g_poll(&pfd, 1, 0);
} while (rc == -1 && errno == EINTR);
assert(rc >= 0);
if (pfd.revents & G_IO_HUP) {
pty_chr_state(chr, 0);
} else {
pty_chr_state(chr, 1);
}
}
static void pty_chr_update_read_handler(Chardev *chr,
GMainContext *context)
{
qemu_mutex_lock(&chr->chr_write_lock);
pty_chr_update_read_handler_locked(chr);
qemu_mutex_unlock(&chr->chr_write_lock);
}
/* Called with chr_write_lock held. */
static int char_pty_chr_write(Chardev *chr, const uint8_t *buf, int len)
{
PtyChardev *s = PTY_CHARDEV(chr);
if (!s->connected) {
/* guest sends data, check for (re-)connect */
pty_chr_update_read_handler_locked(chr);
if (!s->connected) {
return len;
}
}
return io_channel_send(s->ioc, buf, len);
}
static GSource *pty_chr_add_watch(Chardev *chr, GIOCondition cond)
{
PtyChardev *s = PTY_CHARDEV(chr);
if (!s->connected) {
return NULL;
}
return qio_channel_create_watch(s->ioc, cond);
}
static int pty_chr_read_poll(void *opaque)
{
Chardev *chr = CHARDEV(opaque);
PtyChardev *s = PTY_CHARDEV(opaque);
s->read_bytes = qemu_chr_be_can_write(chr);
return s->read_bytes;
}
static gboolean pty_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
{
Chardev *chr = CHARDEV(opaque);
PtyChardev *s = PTY_CHARDEV(opaque);
gsize len;
uint8_t buf[CHR_READ_BUF_LEN];
ssize_t ret;
len = sizeof(buf);
if (len > s->read_bytes) {
len = s->read_bytes;
}
if (len == 0) {
return TRUE;
}
ret = qio_channel_read(s->ioc, (char *)buf, len, NULL);
if (ret <= 0) {
pty_chr_state(chr, 0);
return FALSE;
} else {
pty_chr_state(chr, 1);
qemu_chr_be_write(chr, buf, ret);
}
return TRUE;
}
static gboolean qemu_chr_be_generic_open_func(gpointer opaque)
{
Chardev *chr = CHARDEV(opaque);
PtyChardev *s = PTY_CHARDEV(opaque);
s->open_tag = 0;
qemu_chr_be_generic_open(chr);
return FALSE;
}
/* Called with chr_write_lock held. */
static void pty_chr_state(Chardev *chr, int connected)
{
PtyChardev *s = PTY_CHARDEV(chr);
if (!connected) {
if (s->open_tag) {
g_source_remove(s->open_tag);
s->open_tag = 0;
}
remove_fd_in_watch(chr);
s->connected = 0;
/* (re-)connect poll interval for idle guests: once per second.
* We check more frequently in case the guests sends data to
* the virtual device linked to our pty. */
pty_chr_rearm_timer(chr, 1000);
} else {
if (s->timer_tag) {
g_source_remove(s->timer_tag);
s->timer_tag = 0;
}
if (!s->connected) {
g_assert(s->open_tag == 0);
s->connected = 1;
s->open_tag = g_idle_add(qemu_chr_be_generic_open_func, chr);
}
if (!chr->fd_in_tag) {
chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
pty_chr_read_poll,
pty_chr_read,
chr, NULL);
}
}
}
static void char_pty_finalize(Object *obj)
{
Chardev *chr = CHARDEV(obj);
PtyChardev *s = PTY_CHARDEV(obj);
qemu_mutex_lock(&chr->chr_write_lock);
pty_chr_state(chr, 0);
object_unref(OBJECT(s->ioc));
if (s->timer_tag) {
g_source_remove(s->timer_tag);
s->timer_tag = 0;
}
qemu_mutex_unlock(&chr->chr_write_lock);
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
}
static void char_pty_open(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
PtyChardev *s;
int master_fd, slave_fd;
char pty_name[PATH_MAX];
char *name;
master_fd = qemu_openpty_raw(&slave_fd, pty_name);
if (master_fd < 0) {
error_setg_errno(errp, errno, "Failed to create PTY");
return;
}
close(slave_fd);
qemu_set_nonblock(master_fd);
chr->filename = g_strdup_printf("pty:%s", pty_name);
error_report("char device redirected to %s (label %s)",
pty_name, chr->label);
s = PTY_CHARDEV(chr);
s->ioc = QIO_CHANNEL(qio_channel_file_new_fd(master_fd));
name = g_strdup_printf("chardev-pty-%s", chr->label);
qio_channel_set_name(QIO_CHANNEL(s->ioc), name);
g_free(name);
s->timer_tag = 0;
*be_opened = false;
}
static void char_pty_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->open = char_pty_open;
cc->chr_write = char_pty_chr_write;
cc->chr_update_read_handler = pty_chr_update_read_handler;
cc->chr_add_watch = pty_chr_add_watch;
}
static const TypeInfo char_pty_type_info = {
.name = TYPE_CHARDEV_PTY,
.parent = TYPE_CHARDEV,
.instance_size = sizeof(PtyChardev),
.instance_finalize = char_pty_finalize,
.class_init = char_pty_class_init,
};
static void register_types(void)
{
type_register_static(&char_pty_type_info);
}
type_init(register_types);
#endif

249
chardev/char-ringbuf.c Normal file
View File

@@ -0,0 +1,249 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "sysemu/char.h"
#include "qmp-commands.h"
#include "qemu/base64.h"
/* Ring buffer chardev */
typedef struct {
Chardev parent;
size_t size;
size_t prod;
size_t cons;
uint8_t *cbuf;
} RingBufChardev;
#define RINGBUF_CHARDEV(obj) \
OBJECT_CHECK(RingBufChardev, (obj), TYPE_CHARDEV_RINGBUF)
static size_t ringbuf_count(const Chardev *chr)
{
const RingBufChardev *d = RINGBUF_CHARDEV(chr);
return d->prod - d->cons;
}
static int ringbuf_chr_write(Chardev *chr, const uint8_t *buf, int len)
{
RingBufChardev *d = RINGBUF_CHARDEV(chr);
int i;
if (!buf || (len < 0)) {
return -1;
}
for (i = 0; i < len; i++) {
d->cbuf[d->prod++ & (d->size - 1)] = buf[i];
if (d->prod - d->cons > d->size) {
d->cons = d->prod - d->size;
}
}
return len;
}
static int ringbuf_chr_read(Chardev *chr, uint8_t *buf, int len)
{
RingBufChardev *d = RINGBUF_CHARDEV(chr);
int i;
qemu_mutex_lock(&chr->chr_write_lock);
for (i = 0; i < len && d->cons != d->prod; i++) {
buf[i] = d->cbuf[d->cons++ & (d->size - 1)];
}
qemu_mutex_unlock(&chr->chr_write_lock);
return i;
}
static void char_ringbuf_finalize(Object *obj)
{
RingBufChardev *d = RINGBUF_CHARDEV(obj);
g_free(d->cbuf);
}
static void qemu_chr_open_ringbuf(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
ChardevRingbuf *opts = backend->u.ringbuf.data;
RingBufChardev *d = RINGBUF_CHARDEV(chr);
d->size = opts->has_size ? opts->size : 65536;
/* The size must be power of 2 */
if (d->size & (d->size - 1)) {
error_setg(errp, "size of ringbuf chardev must be power of two");
return;
}
d->prod = 0;
d->cons = 0;
d->cbuf = g_malloc0(d->size);
}
void qmp_ringbuf_write(const char *device, const char *data,
bool has_format, enum DataFormat format,
Error **errp)
{
Chardev *chr;
const uint8_t *write_data;
int ret;
gsize write_count;
chr = qemu_chr_find(device);
if (!chr) {
error_setg(errp, "Device '%s' not found", device);
return;
}
if (!CHARDEV_IS_RINGBUF(chr)) {
error_setg(errp, "%s is not a ringbuf device", device);
return;
}
if (has_format && (format == DATA_FORMAT_BASE64)) {
write_data = qbase64_decode(data, -1,
&write_count,
errp);
if (!write_data) {
return;
}
} else {
write_data = (uint8_t *)data;
write_count = strlen(data);
}
ret = ringbuf_chr_write(chr, write_data, write_count);
if (write_data != (uint8_t *)data) {
g_free((void *)write_data);
}
if (ret < 0) {
error_setg(errp, "Failed to write to device %s", device);
return;
}
}
char *qmp_ringbuf_read(const char *device, int64_t size,
bool has_format, enum DataFormat format,
Error **errp)
{
Chardev *chr;
uint8_t *read_data;
size_t count;
char *data;
chr = qemu_chr_find(device);
if (!chr) {
error_setg(errp, "Device '%s' not found", device);
return NULL;
}
if (!CHARDEV_IS_RINGBUF(chr)) {
error_setg(errp, "%s is not a ringbuf device", device);
return NULL;
}
if (size <= 0) {
error_setg(errp, "size must be greater than zero");
return NULL;
}
count = ringbuf_count(chr);
size = size > count ? count : size;
read_data = g_malloc(size + 1);
ringbuf_chr_read(chr, read_data, size);
if (has_format && (format == DATA_FORMAT_BASE64)) {
data = g_base64_encode(read_data, size);
g_free(read_data);
} else {
/*
* FIXME should read only complete, valid UTF-8 characters up
* to @size bytes. Invalid sequences should be replaced by a
* suitable replacement character. Except when (and only
* when) ring buffer lost characters since last read, initial
* continuation characters should be dropped.
*/
read_data[size] = 0;
data = (char *)read_data;
}
return data;
}
static void qemu_chr_parse_ringbuf(QemuOpts *opts, ChardevBackend *backend,
Error **errp)
{
int val;
ChardevRingbuf *ringbuf;
backend->type = CHARDEV_BACKEND_KIND_RINGBUF;
ringbuf = backend->u.ringbuf.data = g_new0(ChardevRingbuf, 1);
qemu_chr_parse_common(opts, qapi_ChardevRingbuf_base(ringbuf));
val = qemu_opt_get_size(opts, "size", 0);
if (val != 0) {
ringbuf->has_size = true;
ringbuf->size = val;
}
}
static void char_ringbuf_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->parse = qemu_chr_parse_ringbuf;
cc->open = qemu_chr_open_ringbuf;
cc->chr_write = ringbuf_chr_write;
}
static const TypeInfo char_ringbuf_type_info = {
.name = TYPE_CHARDEV_RINGBUF,
.parent = TYPE_CHARDEV,
.class_init = char_ringbuf_class_init,
.instance_size = sizeof(RingBufChardev),
.instance_finalize = char_ringbuf_finalize,
};
/* Bug-compatibility: */
static const TypeInfo char_memory_type_info = {
.name = TYPE_CHARDEV_MEMORY,
.parent = TYPE_CHARDEV_RINGBUF,
};
static void register_types(void)
{
type_register_static(&char_ringbuf_type_info);
type_register_static(&char_memory_type_info);
}
type_init(register_types);

318
chardev/char-serial.c Normal file
View File

@@ -0,0 +1,318 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/sockets.h"
#include "io/channel-file.h"
#include "qapi/error.h"
#ifdef _WIN32
#include "char-win.h"
#else
#include <sys/ioctl.h>
#include <termios.h>
#include "char-fd.h"
#endif
#include "char-serial.h"
#ifdef _WIN32
static void qmp_chardev_open_serial(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
ChardevHostdev *serial = backend->u.serial.data;
win_chr_init(chr, serial->device, errp);
}
#elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \
|| defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
|| defined(__GLIBC__)
static void tty_serial_init(int fd, int speed,
int parity, int data_bits, int stop_bits)
{
struct termios tty;
speed_t spd;
#if 0
printf("tty_serial_init: speed=%d parity=%c data=%d stop=%d\n",
speed, parity, data_bits, stop_bits);
#endif
tcgetattr(fd, &tty);
#define check_speed(val) if (speed <= val) { spd = B##val; break; }
speed = speed * 10 / 11;
do {
check_speed(50);
check_speed(75);
check_speed(110);
check_speed(134);
check_speed(150);
check_speed(200);
check_speed(300);
check_speed(600);
check_speed(1200);
check_speed(1800);
check_speed(2400);
check_speed(4800);
check_speed(9600);
check_speed(19200);
check_speed(38400);
/* Non-Posix values follow. They may be unsupported on some systems. */
check_speed(57600);
check_speed(115200);
#ifdef B230400
check_speed(230400);
#endif
#ifdef B460800
check_speed(460800);
#endif
#ifdef B500000
check_speed(500000);
#endif
#ifdef B576000
check_speed(576000);
#endif
#ifdef B921600
check_speed(921600);
#endif
#ifdef B1000000
check_speed(1000000);
#endif
#ifdef B1152000
check_speed(1152000);
#endif
#ifdef B1500000
check_speed(1500000);
#endif
#ifdef B2000000
check_speed(2000000);
#endif
#ifdef B2500000
check_speed(2500000);
#endif
#ifdef B3000000
check_speed(3000000);
#endif
#ifdef B3500000
check_speed(3500000);
#endif
#ifdef B4000000
check_speed(4000000);
#endif
spd = B115200;
} while (0);
cfsetispeed(&tty, spd);
cfsetospeed(&tty, spd);
tty.c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
| INLCR | IGNCR | ICRNL | IXON);
tty.c_oflag |= OPOST;
tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN | ISIG);
tty.c_cflag &= ~(CSIZE | PARENB | PARODD | CRTSCTS | CSTOPB);
switch (data_bits) {
default:
case 8:
tty.c_cflag |= CS8;
break;
case 7:
tty.c_cflag |= CS7;
break;
case 6:
tty.c_cflag |= CS6;
break;
case 5:
tty.c_cflag |= CS5;
break;
}
switch (parity) {
default:
case 'N':
break;
case 'E':
tty.c_cflag |= PARENB;
break;
case 'O':
tty.c_cflag |= PARENB | PARODD;
break;
}
if (stop_bits == 2) {
tty.c_cflag |= CSTOPB;
}
tcsetattr(fd, TCSANOW, &tty);
}
static int tty_serial_ioctl(Chardev *chr, int cmd, void *arg)
{
FDChardev *s = FD_CHARDEV(chr);
QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc_in);
switch (cmd) {
case CHR_IOCTL_SERIAL_SET_PARAMS:
{
QEMUSerialSetParams *ssp = arg;
tty_serial_init(fioc->fd,
ssp->speed, ssp->parity,
ssp->data_bits, ssp->stop_bits);
}
break;
case CHR_IOCTL_SERIAL_SET_BREAK:
{
int enable = *(int *)arg;
if (enable) {
tcsendbreak(fioc->fd, 1);
}
}
break;
case CHR_IOCTL_SERIAL_GET_TIOCM:
{
int sarg = 0;
int *targ = (int *)arg;
ioctl(fioc->fd, TIOCMGET, &sarg);
*targ = 0;
if (sarg & TIOCM_CTS) {
*targ |= CHR_TIOCM_CTS;
}
if (sarg & TIOCM_CAR) {
*targ |= CHR_TIOCM_CAR;
}
if (sarg & TIOCM_DSR) {
*targ |= CHR_TIOCM_DSR;
}
if (sarg & TIOCM_RI) {
*targ |= CHR_TIOCM_RI;
}
if (sarg & TIOCM_DTR) {
*targ |= CHR_TIOCM_DTR;
}
if (sarg & TIOCM_RTS) {
*targ |= CHR_TIOCM_RTS;
}
}
break;
case CHR_IOCTL_SERIAL_SET_TIOCM:
{
int sarg = *(int *)arg;
int targ = 0;
ioctl(fioc->fd, TIOCMGET, &targ);
targ &= ~(CHR_TIOCM_CTS | CHR_TIOCM_CAR | CHR_TIOCM_DSR
| CHR_TIOCM_RI | CHR_TIOCM_DTR | CHR_TIOCM_RTS);
if (sarg & CHR_TIOCM_CTS) {
targ |= TIOCM_CTS;
}
if (sarg & CHR_TIOCM_CAR) {
targ |= TIOCM_CAR;
}
if (sarg & CHR_TIOCM_DSR) {
targ |= TIOCM_DSR;
}
if (sarg & CHR_TIOCM_RI) {
targ |= TIOCM_RI;
}
if (sarg & CHR_TIOCM_DTR) {
targ |= TIOCM_DTR;
}
if (sarg & CHR_TIOCM_RTS) {
targ |= TIOCM_RTS;
}
ioctl(fioc->fd, TIOCMSET, &targ);
}
break;
default:
return -ENOTSUP;
}
return 0;
}
static void qmp_chardev_open_serial(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
ChardevHostdev *serial = backend->u.serial.data;
int fd;
fd = qmp_chardev_open_file_source(serial->device, O_RDWR, errp);
if (fd < 0) {
return;
}
qemu_set_nonblock(fd);
tty_serial_init(fd, 115200, 'N', 8, 1);
qemu_chr_open_fd(chr, fd, fd);
}
#endif /* __linux__ || __sun__ */
#ifdef HAVE_CHARDEV_SERIAL
static void qemu_chr_parse_serial(QemuOpts *opts, ChardevBackend *backend,
Error **errp)
{
const char *device = qemu_opt_get(opts, "path");
ChardevHostdev *serial;
if (device == NULL) {
error_setg(errp, "chardev: serial/tty: no device path given");
return;
}
backend->type = CHARDEV_BACKEND_KIND_SERIAL;
serial = backend->u.serial.data = g_new0(ChardevHostdev, 1);
qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(serial));
serial->device = g_strdup(device);
}
static void char_serial_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->parse = qemu_chr_parse_serial;
cc->open = qmp_chardev_open_serial;
#ifndef _WIN32
cc->chr_ioctl = tty_serial_ioctl;
#endif
}
static const TypeInfo char_serial_type_info = {
.name = TYPE_CHARDEV_SERIAL,
#ifdef _WIN32
.parent = TYPE_CHARDEV_WIN,
#else
.parent = TYPE_CHARDEV_FD,
#endif
.class_init = char_serial_class_init,
};
static void register_types(void)
{
type_register_static(&char_serial_type_info);
}
type_init(register_types);
#endif

35
chardev/char-serial.h Normal file
View File

@@ -0,0 +1,35 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef CHAR_SERIAL_H
#define CHAR_SERIAL_H
#ifdef _WIN32
#define HAVE_CHARDEV_SERIAL 1
#elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \
|| defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
|| defined(__GLIBC__)
#define HAVE_CHARDEV_SERIAL 1
#endif
#endif

1027
chardev/char-socket.c Normal file

File diff suppressed because it is too large Load Diff

164
chardev/char-stdio.c Normal file
View File

@@ -0,0 +1,164 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/sockets.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#ifdef _WIN32
#include "char-win.h"
#include "char-win-stdio.h"
#else
#include <termios.h>
#include "char-fd.h"
#endif
#ifndef _WIN32
/* init terminal so that we can grab keys */
static struct termios oldtty;
static int old_fd0_flags;
static bool stdio_in_use;
static bool stdio_allow_signal;
static bool stdio_echo_state;
static void term_exit(void)
{
tcsetattr(0, TCSANOW, &oldtty);
fcntl(0, F_SETFL, old_fd0_flags);
}
static void qemu_chr_set_echo_stdio(Chardev *chr, bool echo)
{
struct termios tty;
stdio_echo_state = echo;
tty = oldtty;
if (!echo) {
tty.c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
| INLCR | IGNCR | ICRNL | IXON);
tty.c_oflag |= OPOST;
tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
tty.c_cflag &= ~(CSIZE | PARENB);
tty.c_cflag |= CS8;
tty.c_cc[VMIN] = 1;
tty.c_cc[VTIME] = 0;
}
if (!stdio_allow_signal) {
tty.c_lflag &= ~ISIG;
}
tcsetattr(0, TCSANOW, &tty);
}
static void term_stdio_handler(int sig)
{
/* restore echo after resume from suspend. */
qemu_chr_set_echo_stdio(NULL, stdio_echo_state);
}
static void qemu_chr_open_stdio(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
ChardevStdio *opts = backend->u.stdio.data;
struct sigaction act;
if (is_daemonized()) {
error_setg(errp, "cannot use stdio with -daemonize");
return;
}
if (stdio_in_use) {
error_setg(errp, "cannot use stdio by multiple character devices");
return;
}
stdio_in_use = true;
old_fd0_flags = fcntl(0, F_GETFL);
tcgetattr(0, &oldtty);
qemu_set_nonblock(0);
atexit(term_exit);
memset(&act, 0, sizeof(act));
act.sa_handler = term_stdio_handler;
sigaction(SIGCONT, &act, NULL);
qemu_chr_open_fd(chr, 0, 1);
if (opts->has_signal) {
stdio_allow_signal = opts->signal;
}
qemu_chr_set_echo_stdio(chr, false);
}
#endif
static void qemu_chr_parse_stdio(QemuOpts *opts, ChardevBackend *backend,
Error **errp)
{
ChardevStdio *stdio;
backend->type = CHARDEV_BACKEND_KIND_STDIO;
stdio = backend->u.stdio.data = g_new0(ChardevStdio, 1);
qemu_chr_parse_common(opts, qapi_ChardevStdio_base(stdio));
stdio->has_signal = true;
stdio->signal = qemu_opt_get_bool(opts, "signal", true);
}
static void char_stdio_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->parse = qemu_chr_parse_stdio;
#ifndef _WIN32
cc->open = qemu_chr_open_stdio;
cc->chr_set_echo = qemu_chr_set_echo_stdio;
#endif
}
static void char_stdio_finalize(Object *obj)
{
#ifndef _WIN32
term_exit();
#endif
}
static const TypeInfo char_stdio_type_info = {
.name = TYPE_CHARDEV_STDIO,
#ifdef _WIN32
.parent = TYPE_CHARDEV_WIN_STDIO,
#else
.parent = TYPE_CHARDEV_FD,
#endif
.instance_finalize = char_stdio_finalize,
.class_init = char_stdio_class_init,
};
static void register_types(void)
{
type_register_static(&char_stdio_type_info);
}
type_init(register_types);

233
chardev/char-udp.c Normal file
View File

@@ -0,0 +1,233 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "sysemu/char.h"
#include "io/channel-socket.h"
#include "qapi/error.h"
#include "char-io.h"
/***********************************************************/
/* UDP Net console */
typedef struct {
Chardev parent;
QIOChannel *ioc;
uint8_t buf[CHR_READ_BUF_LEN];
int bufcnt;
int bufptr;
int max_size;
} UdpChardev;
#define UDP_CHARDEV(obj) OBJECT_CHECK(UdpChardev, (obj), TYPE_CHARDEV_UDP)
/* Called with chr_write_lock held. */
static int udp_chr_write(Chardev *chr, const uint8_t *buf, int len)
{
UdpChardev *s = UDP_CHARDEV(chr);
return qio_channel_write(
s->ioc, (const char *)buf, len, NULL);
}
static int udp_chr_read_poll(void *opaque)
{
Chardev *chr = CHARDEV(opaque);
UdpChardev *s = UDP_CHARDEV(opaque);
s->max_size = qemu_chr_be_can_write(chr);
/* If there were any stray characters in the queue process them
* first
*/
while (s->max_size > 0 && s->bufptr < s->bufcnt) {
qemu_chr_be_write(chr, &s->buf[s->bufptr], 1);
s->bufptr++;
s->max_size = qemu_chr_be_can_write(chr);
}
return s->max_size;
}
static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
{
Chardev *chr = CHARDEV(opaque);
UdpChardev *s = UDP_CHARDEV(opaque);
ssize_t ret;
if (s->max_size == 0) {
return TRUE;
}
ret = qio_channel_read(
s->ioc, (char *)s->buf, sizeof(s->buf), NULL);
if (ret <= 0) {
remove_fd_in_watch(chr);
return FALSE;
}
s->bufcnt = ret;
s->bufptr = 0;
while (s->max_size > 0 && s->bufptr < s->bufcnt) {
qemu_chr_be_write(chr, &s->buf[s->bufptr], 1);
s->bufptr++;
s->max_size = qemu_chr_be_can_write(chr);
}
return TRUE;
}
static void udp_chr_update_read_handler(Chardev *chr,
GMainContext *context)
{
UdpChardev *s = UDP_CHARDEV(chr);
remove_fd_in_watch(chr);
if (s->ioc) {
chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
udp_chr_read_poll,
udp_chr_read, chr,
context);
}
}
static void char_udp_finalize(Object *obj)
{
Chardev *chr = CHARDEV(obj);
UdpChardev *s = UDP_CHARDEV(obj);
remove_fd_in_watch(chr);
if (s->ioc) {
object_unref(OBJECT(s->ioc));
}
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
}
static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend,
Error **errp)
{
const char *host = qemu_opt_get(opts, "host");
const char *port = qemu_opt_get(opts, "port");
const char *localaddr = qemu_opt_get(opts, "localaddr");
const char *localport = qemu_opt_get(opts, "localport");
bool has_local = false;
SocketAddress *addr;
ChardevUdp *udp;
backend->type = CHARDEV_BACKEND_KIND_UDP;
if (host == NULL || strlen(host) == 0) {
host = "localhost";
}
if (port == NULL || strlen(port) == 0) {
error_setg(errp, "chardev: udp: remote port not specified");
return;
}
if (localport == NULL || strlen(localport) == 0) {
localport = "0";
} else {
has_local = true;
}
if (localaddr == NULL || strlen(localaddr) == 0) {
localaddr = "";
} else {
has_local = true;
}
udp = backend->u.udp.data = g_new0(ChardevUdp, 1);
qemu_chr_parse_common(opts, qapi_ChardevUdp_base(udp));
addr = g_new0(SocketAddress, 1);
addr->type = SOCKET_ADDRESS_KIND_INET;
addr->u.inet.data = g_new(InetSocketAddress, 1);
*addr->u.inet.data = (InetSocketAddress) {
.host = g_strdup(host),
.port = g_strdup(port),
.has_ipv4 = qemu_opt_get(opts, "ipv4"),
.ipv4 = qemu_opt_get_bool(opts, "ipv4", 0),
.has_ipv6 = qemu_opt_get(opts, "ipv6"),
.ipv6 = qemu_opt_get_bool(opts, "ipv6", 0),
};
udp->remote = addr;
if (has_local) {
udp->has_local = true;
addr = g_new0(SocketAddress, 1);
addr->type = SOCKET_ADDRESS_KIND_INET;
addr->u.inet.data = g_new(InetSocketAddress, 1);
*addr->u.inet.data = (InetSocketAddress) {
.host = g_strdup(localaddr),
.port = g_strdup(localport),
};
udp->local = addr;
}
}
static void qmp_chardev_open_udp(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
ChardevUdp *udp = backend->u.udp.data;
QIOChannelSocket *sioc = qio_channel_socket_new();
char *name;
UdpChardev *s = UDP_CHARDEV(chr);
if (qio_channel_socket_dgram_sync(sioc,
udp->local, udp->remote,
errp) < 0) {
object_unref(OBJECT(sioc));
return;
}
name = g_strdup_printf("chardev-udp-%s", chr->label);
qio_channel_set_name(QIO_CHANNEL(sioc), name);
g_free(name);
s->ioc = QIO_CHANNEL(sioc);
/* be isn't opened until we get a connection */
*be_opened = false;
}
static void char_udp_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->parse = qemu_chr_parse_udp;
cc->open = qmp_chardev_open_udp;
cc->chr_write = udp_chr_write;
cc->chr_update_read_handler = udp_chr_update_read_handler;
}
static const TypeInfo char_udp_type_info = {
.name = TYPE_CHARDEV_UDP,
.parent = TYPE_CHARDEV,
.instance_size = sizeof(UdpChardev),
.instance_finalize = char_udp_finalize,
.class_init = char_udp_class_init,
};
static void register_types(void)
{
type_register_static(&char_udp_type_info);
}
type_init(register_types);

266
chardev/char-win-stdio.c Normal file
View File

@@ -0,0 +1,266 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "char-win.h"
#include "char-win-stdio.h"
typedef struct {
Chardev parent;
HANDLE hStdIn;
HANDLE hInputReadyEvent;
HANDLE hInputDoneEvent;
HANDLE hInputThread;
uint8_t win_stdio_buf;
} WinStdioChardev;
#define WIN_STDIO_CHARDEV(obj) \
OBJECT_CHECK(WinStdioChardev, (obj), TYPE_CHARDEV_WIN_STDIO)
static void win_stdio_wait_func(void *opaque)
{
Chardev *chr = CHARDEV(opaque);
WinStdioChardev *stdio = WIN_STDIO_CHARDEV(opaque);
INPUT_RECORD buf[4];
int ret;
DWORD dwSize;
int i;
ret = ReadConsoleInput(stdio->hStdIn, buf, ARRAY_SIZE(buf), &dwSize);
if (!ret) {
/* Avoid error storm */
qemu_del_wait_object(stdio->hStdIn, NULL, NULL);
return;
}
for (i = 0; i < dwSize; i++) {
KEY_EVENT_RECORD *kev = &buf[i].Event.KeyEvent;
if (buf[i].EventType == KEY_EVENT && kev->bKeyDown) {
int j;
if (kev->uChar.AsciiChar != 0) {
for (j = 0; j < kev->wRepeatCount; j++) {
if (qemu_chr_be_can_write(chr)) {
uint8_t c = kev->uChar.AsciiChar;
qemu_chr_be_write(chr, &c, 1);
}
}
}
}
}
}
static DWORD WINAPI win_stdio_thread(LPVOID param)
{
WinStdioChardev *stdio = WIN_STDIO_CHARDEV(param);
int ret;
DWORD dwSize;
while (1) {
/* Wait for one byte */
ret = ReadFile(stdio->hStdIn, &stdio->win_stdio_buf, 1, &dwSize, NULL);
/* Exit in case of error, continue if nothing read */
if (!ret) {
break;
}
if (!dwSize) {
continue;
}
/* Some terminal emulator returns \r\n for Enter, just pass \n */
if (stdio->win_stdio_buf == '\r') {
continue;
}
/* Signal the main thread and wait until the byte was eaten */
if (!SetEvent(stdio->hInputReadyEvent)) {
break;
}
if (WaitForSingleObject(stdio->hInputDoneEvent, INFINITE)
!= WAIT_OBJECT_0) {
break;
}
}
qemu_del_wait_object(stdio->hInputReadyEvent, NULL, NULL);
return 0;
}
static void win_stdio_thread_wait_func(void *opaque)
{
Chardev *chr = CHARDEV(opaque);
WinStdioChardev *stdio = WIN_STDIO_CHARDEV(opaque);
if (qemu_chr_be_can_write(chr)) {
qemu_chr_be_write(chr, &stdio->win_stdio_buf, 1);
}
SetEvent(stdio->hInputDoneEvent);
}
static void qemu_chr_set_echo_win_stdio(Chardev *chr, bool echo)
{
WinStdioChardev *stdio = WIN_STDIO_CHARDEV(chr);
DWORD dwMode = 0;
GetConsoleMode(stdio->hStdIn, &dwMode);
if (echo) {
SetConsoleMode(stdio->hStdIn, dwMode | ENABLE_ECHO_INPUT);
} else {
SetConsoleMode(stdio->hStdIn, dwMode & ~ENABLE_ECHO_INPUT);
}
}
static void qemu_chr_open_stdio(Chardev *chr,
ChardevBackend *backend,
bool *be_opened,
Error **errp)
{
WinStdioChardev *stdio = WIN_STDIO_CHARDEV(chr);
DWORD dwMode;
int is_console = 0;
stdio->hStdIn = GetStdHandle(STD_INPUT_HANDLE);
if (stdio->hStdIn == INVALID_HANDLE_VALUE) {
error_setg(errp, "cannot open stdio: invalid handle");
return;
}
is_console = GetConsoleMode(stdio->hStdIn, &dwMode) != 0;
if (is_console) {
if (qemu_add_wait_object(stdio->hStdIn,
win_stdio_wait_func, chr)) {
error_setg(errp, "qemu_add_wait_object: failed");
goto err1;
}
} else {
DWORD dwId;
stdio->hInputReadyEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
stdio->hInputDoneEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
if (stdio->hInputReadyEvent == INVALID_HANDLE_VALUE
|| stdio->hInputDoneEvent == INVALID_HANDLE_VALUE) {
error_setg(errp, "cannot create event");
goto err2;
}
if (qemu_add_wait_object(stdio->hInputReadyEvent,
win_stdio_thread_wait_func, chr)) {
error_setg(errp, "qemu_add_wait_object: failed");
goto err2;
}
stdio->hInputThread = CreateThread(NULL, 0, win_stdio_thread,
chr, 0, &dwId);
if (stdio->hInputThread == INVALID_HANDLE_VALUE) {
error_setg(errp, "cannot create stdio thread");
goto err3;
}
}
dwMode |= ENABLE_LINE_INPUT;
if (is_console) {
/* set the terminal in raw mode */
/* ENABLE_QUICK_EDIT_MODE | ENABLE_EXTENDED_FLAGS */
dwMode |= ENABLE_PROCESSED_INPUT;
}
SetConsoleMode(stdio->hStdIn, dwMode);
qemu_chr_set_echo_win_stdio(chr, false);
return;
err3:
qemu_del_wait_object(stdio->hInputReadyEvent, NULL, NULL);
err2:
CloseHandle(stdio->hInputReadyEvent);
CloseHandle(stdio->hInputDoneEvent);
err1:
qemu_del_wait_object(stdio->hStdIn, NULL, NULL);
}
static void char_win_stdio_finalize(Object *obj)
{
WinStdioChardev *stdio = WIN_STDIO_CHARDEV(obj);
if (stdio->hInputReadyEvent != INVALID_HANDLE_VALUE) {
CloseHandle(stdio->hInputReadyEvent);
}
if (stdio->hInputDoneEvent != INVALID_HANDLE_VALUE) {
CloseHandle(stdio->hInputDoneEvent);
}
if (stdio->hInputThread != INVALID_HANDLE_VALUE) {
TerminateThread(stdio->hInputThread, 0);
}
}
static int win_stdio_write(Chardev *chr, const uint8_t *buf, int len)
{
HANDLE hStdOut = GetStdHandle(STD_OUTPUT_HANDLE);
DWORD dwSize;
int len1;
len1 = len;
while (len1 > 0) {
if (!WriteFile(hStdOut, buf, len1, &dwSize, NULL)) {
break;
}
buf += dwSize;
len1 -= dwSize;
}
return len - len1;
}
static void char_win_stdio_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->open = qemu_chr_open_stdio;
cc->chr_write = win_stdio_write;
cc->chr_set_echo = qemu_chr_set_echo_win_stdio;
}
static const TypeInfo char_win_stdio_type_info = {
.name = TYPE_CHARDEV_WIN_STDIO,
.parent = TYPE_CHARDEV,
.instance_size = sizeof(WinStdioChardev),
.instance_finalize = char_win_stdio_finalize,
.class_init = char_win_stdio_class_init,
.abstract = true,
};
static void register_types(void)
{
type_register_static(&char_win_stdio_type_info);
}
type_init(register_types);

29
chardev/char-win-stdio.h Normal file
View File

@@ -0,0 +1,29 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef CHAR_WIN_STDIO_H
#define CHAR_WIN_STDIO_H
#define TYPE_CHARDEV_WIN_STDIO "chardev-win-stdio"
#endif /* CHAR_WIN_STDIO_H */

265
chardev/char-win.c Normal file
View File

@@ -0,0 +1,265 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qapi/error.h"
#include "char-win.h"
static void win_chr_readfile(Chardev *chr)
{
WinChardev *s = WIN_CHARDEV(chr);
int ret, err;
uint8_t buf[CHR_READ_BUF_LEN];
DWORD size;
ZeroMemory(&s->orecv, sizeof(s->orecv));
s->orecv.hEvent = s->hrecv;
ret = ReadFile(s->hcom, buf, s->len, &size, &s->orecv);
if (!ret) {
err = GetLastError();
if (err == ERROR_IO_PENDING) {
ret = GetOverlappedResult(s->hcom, &s->orecv, &size, TRUE);
}
}
if (size > 0) {
qemu_chr_be_write(chr, buf, size);
}
}
static void win_chr_read(Chardev *chr)
{
WinChardev *s = WIN_CHARDEV(chr);
if (s->len > s->max_size) {
s->len = s->max_size;
}
if (s->len == 0) {
return;
}
win_chr_readfile(chr);
}
static int win_chr_read_poll(Chardev *chr)
{
WinChardev *s = WIN_CHARDEV(chr);
s->max_size = qemu_chr_be_can_write(chr);
return s->max_size;
}
static int win_chr_poll(void *opaque)
{
Chardev *chr = CHARDEV(opaque);
WinChardev *s = WIN_CHARDEV(opaque);
COMSTAT status;
DWORD comerr;
ClearCommError(s->hcom, &comerr, &status);
if (status.cbInQue > 0) {
s->len = status.cbInQue;
win_chr_read_poll(chr);
win_chr_read(chr);
return 1;
}
return 0;
}
int win_chr_init(Chardev *chr, const char *filename, Error **errp)
{
WinChardev *s = WIN_CHARDEV(chr);
COMMCONFIG comcfg;
COMMTIMEOUTS cto = { 0, 0, 0, 0, 0};
COMSTAT comstat;
DWORD size;
DWORD err;
s->hsend = CreateEvent(NULL, TRUE, FALSE, NULL);
if (!s->hsend) {
error_setg(errp, "Failed CreateEvent");
goto fail;
}
s->hrecv = CreateEvent(NULL, TRUE, FALSE, NULL);
if (!s->hrecv) {
error_setg(errp, "Failed CreateEvent");
goto fail;
}
s->hcom = CreateFile(filename, GENERIC_READ | GENERIC_WRITE, 0, NULL,
OPEN_EXISTING, FILE_FLAG_OVERLAPPED, 0);
if (s->hcom == INVALID_HANDLE_VALUE) {
error_setg(errp, "Failed CreateFile (%lu)", GetLastError());
s->hcom = NULL;
goto fail;
}
if (!SetupComm(s->hcom, NRECVBUF, NSENDBUF)) {
error_setg(errp, "Failed SetupComm");
goto fail;
}
ZeroMemory(&comcfg, sizeof(COMMCONFIG));
size = sizeof(COMMCONFIG);
GetDefaultCommConfig(filename, &comcfg, &size);
comcfg.dcb.DCBlength = sizeof(DCB);
CommConfigDialog(filename, NULL, &comcfg);
if (!SetCommState(s->hcom, &comcfg.dcb)) {
error_setg(errp, "Failed SetCommState");
goto fail;
}
if (!SetCommMask(s->hcom, EV_ERR)) {
error_setg(errp, "Failed SetCommMask");
goto fail;
}
cto.ReadIntervalTimeout = MAXDWORD;
if (!SetCommTimeouts(s->hcom, &cto)) {
error_setg(errp, "Failed SetCommTimeouts");
goto fail;
}
if (!ClearCommError(s->hcom, &err, &comstat)) {
error_setg(errp, "Failed ClearCommError");
goto fail;
}
qemu_add_polling_cb(win_chr_poll, chr);
return 0;
fail:
return -1;
}
int win_chr_pipe_poll(void *opaque)
{
Chardev *chr = CHARDEV(opaque);
WinChardev *s = WIN_CHARDEV(opaque);
DWORD size;
PeekNamedPipe(s->hcom, NULL, 0, NULL, &size, NULL);
if (size > 0) {
s->len = size;
win_chr_read_poll(chr);
win_chr_read(chr);
return 1;
}
return 0;
}
/* Called with chr_write_lock held. */
static int win_chr_write(Chardev *chr, const uint8_t *buf, int len1)
{
WinChardev *s = WIN_CHARDEV(chr);
DWORD len, ret, size, err;
len = len1;
ZeroMemory(&s->osend, sizeof(s->osend));
s->osend.hEvent = s->hsend;
while (len > 0) {
if (s->hsend) {
ret = WriteFile(s->hcom, buf, len, &size, &s->osend);
} else {
ret = WriteFile(s->hcom, buf, len, &size, NULL);
}
if (!ret) {
err = GetLastError();
if (err == ERROR_IO_PENDING) {
ret = GetOverlappedResult(s->hcom, &s->osend, &size, TRUE);
if (ret) {
buf += size;
len -= size;
} else {
break;
}
} else {
break;
}
} else {
buf += size;
len -= size;
}
}
return len1 - len;
}
static void char_win_finalize(Object *obj)
{
Chardev *chr = CHARDEV(obj);
WinChardev *s = WIN_CHARDEV(chr);
if (s->skip_free) {
return;
}
if (s->hsend) {
CloseHandle(s->hsend);
}
if (s->hrecv) {
CloseHandle(s->hrecv);
}
if (s->hcom) {
CloseHandle(s->hcom);
}
if (s->fpipe) {
qemu_del_polling_cb(win_chr_pipe_poll, chr);
} else {
qemu_del_polling_cb(win_chr_poll, chr);
}
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
}
void qemu_chr_open_win_file(Chardev *chr, HANDLE fd_out)
{
WinChardev *s = WIN_CHARDEV(chr);
s->skip_free = true;
s->hcom = fd_out;
}
static void char_win_class_init(ObjectClass *oc, void *data)
{
ChardevClass *cc = CHARDEV_CLASS(oc);
cc->chr_write = win_chr_write;
}
static const TypeInfo char_win_type_info = {
.name = TYPE_CHARDEV_WIN,
.parent = TYPE_CHARDEV,
.instance_size = sizeof(WinChardev),
.instance_finalize = char_win_finalize,
.class_init = char_win_class_init,
.abstract = true,
};
static void register_types(void)
{
type_register_static(&char_win_type_info);
}
type_init(register_types);

53
chardev/char-win.h Normal file
View File

@@ -0,0 +1,53 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef CHAR_WIN_H
#define CHAR_WIN_H
#include "sysemu/char.h"
typedef struct {
Chardev parent;
int max_size;
HANDLE hcom, hrecv, hsend;
OVERLAPPED orecv;
BOOL fpipe;
DWORD len;
/* Protected by the Chardev chr_write_lock. */
OVERLAPPED osend;
/* FIXME: file/console do not finalize */
bool skip_free;
} WinChardev;
#define NSENDBUF 2048
#define NRECVBUF 2048
#define TYPE_CHARDEV_WIN "chardev-win"
#define WIN_CHARDEV(obj) OBJECT_CHECK(WinChardev, (obj), TYPE_CHARDEV_WIN)
void qemu_chr_open_win_file(Chardev *chr, HANDLE fd_out);
int win_chr_init(Chardev *chr, const char *filename, Error **errp);
int win_chr_pipe_poll(void *opaque);
#endif /* CHAR_WIN_H */

1335
chardev/char.c Normal file

File diff suppressed because it is too large Load Diff

23
configure vendored
View File

@@ -1474,7 +1474,7 @@ fi
gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
gcc_flags="-Wno-missing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
gcc_flags="-Wendif-labels -Wno-shift-negative-value $gcc_flags"
gcc_flags="-Wno-initializer-overrides $gcc_flags"
gcc_flags="-Wno-string-plus-int $gcc_flags"
@@ -3378,7 +3378,7 @@ fi
fdt_required=no
for target in $target_list; do
case $target in
aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu)
aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu|mips64el-softmmu)
fdt_required=yes
;;
esac
@@ -3396,11 +3396,11 @@ fi
if test "$fdt" != "no" ; then
fdt_libs="-lfdt"
# explicitly check for libfdt_env.h as it is missing in some stable installs
# and test for required functions to make sure we are on a version >= 1.4.0
# and test for required functions to make sure we are on a version >= 1.4.2
cat > $TMPC << EOF
#include <libfdt.h>
#include <libfdt_env.h>
int main(void) { fdt_get_property_by_offset(0, 0, 0); return 0; }
int main(void) { fdt_first_subnode(0, 0); return 0; }
EOF
if compile_prog "" "$fdt_libs" ; then
# system DTC is good - use it
@@ -3418,7 +3418,7 @@ EOF
fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
elif test "$fdt" = "yes" ; then
# have neither and want - prompt for system/submodule install
error_exit "DTC (libfdt) version >= 1.4.0 not present. Your options:" \
error_exit "DTC (libfdt) version >= 1.4.2 not present. Your options:" \
" (1) Preferred: Install the DTC (libfdt) devel package" \
" (2) Fetch the DTC submodule, using:" \
" git submodule update --init dtc"
@@ -5843,7 +5843,7 @@ target_name=$(echo $target | cut -d '-' -f 1)
target_bigendian="no"
case "$target_name" in
armeb|hppa|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or32|ppc|ppcemb|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
armeb|hppa|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or1k|ppc|ppcemb|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
target_bigendian=yes
;;
esac
@@ -5879,6 +5879,7 @@ mkdir -p $target_dir
echo "# Automatically generated by configure - do not modify" > $config_target_mak
bflt="no"
mttcg="no"
interp_prefix1=$(echo "$interp_prefix" | sed "s/%M/$target_name/g")
gdb_xml_files=""
@@ -5893,15 +5894,18 @@ case "$target_name" in
TARGET_BASE_ARCH=i386
;;
alpha)
mttcg="yes"
;;
arm|armeb)
TARGET_ARCH=arm
bflt="yes"
mttcg="yes"
gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
;;
aarch64)
TARGET_BASE_ARCH=arm
bflt="yes"
mttcg="yes"
gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
;;
cris)
@@ -5937,7 +5941,7 @@ case "$target_name" in
;;
nios2)
;;
or32)
or1k)
TARGET_ARCH=openrisc
TARGET_BASE_ARCH=openrisc
;;
@@ -6066,6 +6070,9 @@ if test "$target_bigendian" = "yes" ; then
fi
if test "$target_softmmu" = "yes" ; then
echo "CONFIG_SOFTMMU=y" >> $config_target_mak
if test "$mttcg" = "yes" ; then
echo "TARGET_SUPPORTS_MTTCG=y" >> $config_target_mak
fi
fi
if test "$target_user_only" = "yes" ; then
echo "CONFIG_USER_ONLY=y" >> $config_target_mak
@@ -6145,7 +6152,7 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
nios2)
disas_config "NIOS2"
;;
or32)
or1k)
disas_config "OPENRISC"
;;
ppc*)

View File

@@ -23,9 +23,6 @@
#include "exec/exec-all.h"
#include "exec/memory-internal.h"
bool exit_request;
CPUState *tcg_current_cpu;
/* exit the current TB, but without causing any exception to be raised */
void cpu_loop_exit_noexc(CPUState *cpu)
{

View File

@@ -18,7 +18,7 @@
*/
#include "qemu/osdep.h"
#include "cpu.h"
#include "trace.h"
#include "trace-root.h"
#include "disas/disas.h"
#include "exec/exec-all.h"
#include "tcg.h"
@@ -29,6 +29,7 @@
#include "qemu/rcu.h"
#include "exec/tb-hash.h"
#include "exec/log.h"
#include "qemu/main-loop.h"
#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
#include "hw/i386/apic.h"
#endif
@@ -227,20 +228,43 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
static void cpu_exec_step(CPUState *cpu)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_gen_code(cpu, pc, cs_base, flags,
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
tb->orig_tb = NULL;
/* execute the generated code */
trace_exec_tb_nocache(tb, pc);
cpu_tb_exec(cpu, tb);
tb_phys_invalidate(tb, -1);
tb_free(tb);
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
mmap_lock();
tb_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags,
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
tb->orig_tb = NULL;
tb_unlock();
mmap_unlock();
cc->cpu_exec_enter(cpu);
/* execute the generated code */
trace_exec_tb_nocache(tb, pc);
cpu_tb_exec(cpu, tb);
cc->cpu_exec_exit(cpu);
tb_lock();
tb_phys_invalidate(tb, -1);
tb_free(tb);
tb_unlock();
} else {
/* We may have exited due to another problem here, so we need
* to reset any tb_locks we may have taken but didn't release.
* The mmap_lock is dropped by tb_gen_code if it runs out of
* memory.
*/
#ifndef CONFIG_SOFTMMU
tcg_debug_assert(!have_mmap_lock());
#endif
tb_lock_reset();
}
}
void cpu_exec_step_atomic(CPUState *cpu)
@@ -384,12 +408,13 @@ static inline bool cpu_handle_halt(CPUState *cpu)
if ((cpu->interrupt_request & CPU_INTERRUPT_POLL)
&& replay_interrupt()) {
X86CPU *x86_cpu = X86_CPU(cpu);
qemu_mutex_lock_iothread();
apic_poll_irq(x86_cpu->apic_state);
cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
qemu_mutex_unlock_iothread();
}
#endif
if (!cpu_has_work(cpu)) {
current_cpu = NULL;
return true;
}
@@ -439,7 +464,9 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
#else
if (replay_exception()) {
CPUClass *cc = CPU_GET_CLASS(cpu);
qemu_mutex_lock_iothread();
cc->do_interrupt(cpu);
qemu_mutex_unlock_iothread();
cpu->exception_index = -1;
} else if (!replay_has_interrupt()) {
/* give a chance to iothread in replay mode */
@@ -461,13 +488,15 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
return false;
}
static inline void cpu_handle_interrupt(CPUState *cpu,
static inline bool cpu_handle_interrupt(CPUState *cpu,
TranslationBlock **last_tb)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
int interrupt_request = cpu->interrupt_request;
if (unlikely(interrupt_request)) {
if (unlikely(atomic_read(&cpu->interrupt_request))) {
int interrupt_request;
qemu_mutex_lock_iothread();
interrupt_request = cpu->interrupt_request;
if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
/* Mask out external interrupts for this step. */
interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
@@ -475,7 +504,8 @@ static inline void cpu_handle_interrupt(CPUState *cpu,
if (interrupt_request & CPU_INTERRUPT_DEBUG) {
cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
cpu->exception_index = EXCP_DEBUG;
cpu_loop_exit(cpu);
qemu_mutex_unlock_iothread();
return true;
}
if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
/* Do nothing */
@@ -484,23 +514,26 @@ static inline void cpu_handle_interrupt(CPUState *cpu,
cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
cpu->halted = 1;
cpu->exception_index = EXCP_HLT;
cpu_loop_exit(cpu);
qemu_mutex_unlock_iothread();
return true;
}
#if defined(TARGET_I386)
else if (interrupt_request & CPU_INTERRUPT_INIT) {
X86CPU *x86_cpu = X86_CPU(cpu);
CPUArchState *env = &x86_cpu->env;
replay_interrupt();
cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0);
cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
do_cpu_init(x86_cpu);
cpu->exception_index = EXCP_HALTED;
cpu_loop_exit(cpu);
qemu_mutex_unlock_iothread();
return true;
}
#else
else if (interrupt_request & CPU_INTERRUPT_RESET) {
replay_interrupt();
cpu_reset(cpu);
cpu_loop_exit(cpu);
qemu_mutex_unlock_iothread();
return true;
}
#endif
/* The target hook has 3 exit conditions:
@@ -522,12 +555,19 @@ static inline void cpu_handle_interrupt(CPUState *cpu,
the program flow was changed */
*last_tb = NULL;
}
/* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
qemu_mutex_unlock_iothread();
}
if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
atomic_set(&cpu->exit_request, 0);
cpu->exception_index = EXCP_INTERRUPT;
cpu_loop_exit(cpu);
return true;
}
return false;
}
static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
@@ -542,21 +582,19 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
trace_exec_tb(tb, tb->pc);
ret = cpu_tb_exec(cpu, tb);
*last_tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
*tb_exit = ret & TB_EXIT_MASK;
switch (*tb_exit) {
case TB_EXIT_REQUESTED:
/* Something asked us to stop executing
* chained TBs; just continue round the main
* loop. Whatever requested the exit will also
* have set something else (eg exit_request or
* interrupt_request) which we will handle
* next time around the loop. But we need to
* ensure the tcg_exit_req read in generated code
* comes before the next read of cpu->exit_request
* or cpu->interrupt_request.
/* Something asked us to stop executing chained TBs; just
* continue round the main loop. Whatever requested the exit
* will also have set something else (eg interrupt_request)
* which we will handle next time around the loop. But we
* need to ensure the tcg_exit_req read in generated code
* comes before the next read of cpu->exit_request or
* cpu->interrupt_request.
*/
smp_rmb();
smp_mb();
*last_tb = NULL;
break;
case TB_EXIT_ICOUNT_EXPIRED:
@@ -566,6 +604,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
abort();
#else
int insns_left = cpu->icount_decr.u32;
*last_tb = NULL;
if (cpu->icount_extra && insns_left >= 0) {
/* Refill decrementer and continue execution. */
cpu->icount_extra += insns_left;
@@ -575,17 +614,17 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
} else {
if (insns_left > 0) {
/* Execute remaining instructions. */
cpu_exec_nocache(cpu, insns_left, *last_tb, false);
cpu_exec_nocache(cpu, insns_left, tb, false);
align_clocks(sc, cpu);
}
cpu->exception_index = EXCP_INTERRUPT;
*last_tb = NULL;
cpu_loop_exit(cpu);
}
break;
#endif
}
default:
*last_tb = tb;
break;
}
}
@@ -605,13 +644,8 @@ int cpu_exec(CPUState *cpu)
return EXCP_HALTED;
}
atomic_mb_set(&tcg_current_cpu, cpu);
rcu_read_lock();
if (unlikely(atomic_mb_read(&exit_request))) {
cpu->exit_request = 1;
}
cc->cpu_exec_enter(cpu);
/* Calculate difference between guest clock and host clock.
@@ -621,50 +655,43 @@ int cpu_exec(CPUState *cpu)
*/
init_delay_params(&sc, cpu);
for(;;) {
/* prepare setjmp context for exception handling */
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
TranslationBlock *tb, *last_tb = NULL;
int tb_exit = 0;
/* if an exception is pending, we execute it here */
if (cpu_handle_exception(cpu, &ret)) {
break;
}
for(;;) {
cpu_handle_interrupt(cpu, &last_tb);
tb = tb_find(cpu, last_tb, tb_exit);
cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
/* Try to align the host and virtual clocks
if the guest is in advance */
align_clocks(&sc, cpu);
} /* for(;;) */
} else {
/* prepare setjmp context for exception handling */
if (sigsetjmp(cpu->jmp_env, 0) != 0) {
#if defined(__clang__) || !QEMU_GNUC_PREREQ(4, 6)
/* Some compilers wrongly smash all local variables after
* siglongjmp. There were bug reports for gcc 4.5.0 and clang.
* Reload essential local variables here for those compilers.
* Newer versions of gcc would complain about this code (-Wclobbered). */
cpu = current_cpu;
cc = CPU_GET_CLASS(cpu);
/* Some compilers wrongly smash all local variables after
* siglongjmp. There were bug reports for gcc 4.5.0 and clang.
* Reload essential local variables here for those compilers.
* Newer versions of gcc would complain about this code (-Wclobbered). */
cpu = current_cpu;
cc = CPU_GET_CLASS(cpu);
#else /* buggy compiler */
/* Assert that the compiler does not smash local variables. */
g_assert(cpu == current_cpu);
g_assert(cc == CPU_GET_CLASS(cpu));
/* Assert that the compiler does not smash local variables. */
g_assert(cpu == current_cpu);
g_assert(cc == CPU_GET_CLASS(cpu));
#endif /* buggy compiler */
cpu->can_do_io = 1;
tb_lock_reset();
cpu->can_do_io = 1;
tb_lock_reset();
if (qemu_mutex_iothread_locked()) {
qemu_mutex_unlock_iothread();
}
} /* for(;;) */
}
/* if an exception is pending, we execute it here */
while (!cpu_handle_exception(cpu, &ret)) {
TranslationBlock *last_tb = NULL;
int tb_exit = 0;
while (!cpu_handle_interrupt(cpu, &last_tb)) {
TranslationBlock *tb = tb_find(cpu, last_tb, tb_exit);
cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
/* Try to align the host and virtual clocks
if the guest is in advance */
align_clocks(&sc, cpu);
}
}
cc->cpu_exec_exit(cpu);
rcu_read_unlock();
/* fail safe : never use current_cpu outside cpu_exec() */
current_cpu = NULL;
/* Does not need atomic_mb_set because a spurious wakeup is okay. */
atomic_set(&tcg_current_cpu, NULL);
return ret;
}

387
cpus.c
View File

@@ -25,6 +25,7 @@
/* Needed early for CONFIG_BSD etc. */
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/config-file.h"
#include "cpu.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qerror.h"
@@ -45,6 +46,7 @@
#include "qemu/main-loop.h"
#include "qemu/bitmap.h"
#include "qemu/seqlock.h"
#include "tcg.h"
#include "qapi-event.h"
#include "hw/nmi.h"
#include "sysemu/replay.h"
@@ -150,6 +152,77 @@ typedef struct TimersState {
} TimersState;
static TimersState timers_state;
bool mttcg_enabled;
/*
* We default to false if we know other options have been enabled
* which are currently incompatible with MTTCG. Otherwise when each
* guest (target) has been updated to support:
* - atomic instructions
* - memory ordering primitives (barriers)
* they can set the appropriate CONFIG flags in ${target}-softmmu.mak
*
* Once a guest architecture has been converted to the new primitives
* there are two remaining limitations to check.
*
* - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
* - The host must have a stronger memory order than the guest
*
* It may be possible in future to support strong guests on weak hosts
* but that will require tagging all load/stores in a guest with their
* implicit memory order requirements which would likely slow things
* down a lot.
*/
static bool check_tcg_memory_orders_compatible(void)
{
#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
#else
return false;
#endif
}
static bool default_mttcg_enabled(void)
{
QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
const char *rr = qemu_opt_get(icount_opts, "rr");
if (rr || TCG_OVERSIZED_GUEST) {
return false;
} else {
#ifdef TARGET_SUPPORTS_MTTCG
return check_tcg_memory_orders_compatible();
#else
return false;
#endif
}
}
void qemu_tcg_configure(QemuOpts *opts, Error **errp)
{
const char *t = qemu_opt_get(opts, "thread");
if (t) {
if (strcmp(t, "multi") == 0) {
if (TCG_OVERSIZED_GUEST) {
error_setg(errp, "No MTTCG when guest word size > hosts");
} else {
if (!check_tcg_memory_orders_compatible()) {
error_report("Guest expects a stronger memory ordering "
"than the host provides");
error_printf("This may cause strange/hard to debug errors");
}
mttcg_enabled = true;
}
} else if (strcmp(t, "single") == 0) {
mttcg_enabled = false;
} else {
error_setg(errp, "Invalid 'thread' setting %s", t);
}
} else {
mttcg_enabled = default_mttcg_enabled();
}
}
int64_t cpu_get_icount_raw(void)
{
@@ -694,6 +767,63 @@ void configure_icount(QemuOpts *opts, Error **errp)
NANOSECONDS_PER_SECOND / 10);
}
/***********************************************************/
/* TCG vCPU kick timer
*
* The kick timer is responsible for moving single threaded vCPU
* emulation on to the next vCPU. If more than one vCPU is running a
* timer event with force a cpu->exit so the next vCPU can get
* scheduled.
*
* The timer is removed if all vCPUs are idle and restarted again once
* idleness is complete.
*/
static QEMUTimer *tcg_kick_vcpu_timer;
static CPUState *tcg_current_rr_cpu;
#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
static inline int64_t qemu_tcg_next_kick(void)
{
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
}
/* Kick the currently round-robin scheduled vCPU */
static void qemu_cpu_kick_rr_cpu(void)
{
CPUState *cpu;
do {
cpu = atomic_mb_read(&tcg_current_rr_cpu);
if (cpu) {
cpu_exit(cpu);
}
} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
}
static void kick_tcg_thread(void *opaque)
{
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
qemu_cpu_kick_rr_cpu();
}
static void start_tcg_kick_timer(void)
{
if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
kick_tcg_thread, NULL);
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
}
}
static void stop_tcg_kick_timer(void)
{
if (tcg_kick_vcpu_timer) {
timer_del(tcg_kick_vcpu_timer);
tcg_kick_vcpu_timer = NULL;
}
}
/***********************************************************/
void hw_error(const char *fmt, ...)
{
@@ -896,8 +1026,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu)
#endif /* _WIN32 */
static QemuMutex qemu_global_mutex;
static QemuCond qemu_io_proceeded_cond;
static unsigned iothread_requesting_mutex;
static QemuThread io_thread;
@@ -911,7 +1039,6 @@ void qemu_init_cpu_loop(void)
qemu_init_sigbus();
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_pause_cond);
qemu_cond_init(&qemu_io_proceeded_cond);
qemu_mutex_init(&qemu_global_mutex);
qemu_thread_get_self(&io_thread);
@@ -936,28 +1063,34 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
static void qemu_wait_io_event_common(CPUState *cpu)
{
atomic_mb_set(&cpu->thread_kicked, false);
if (cpu->stop) {
cpu->stop = false;
cpu->stopped = true;
qemu_cond_broadcast(&qemu_pause_cond);
}
process_queued_cpu_work(cpu);
cpu->thread_kicked = false;
}
static bool qemu_tcg_should_sleep(CPUState *cpu)
{
if (mttcg_enabled) {
return cpu_thread_is_idle(cpu);
} else {
return all_cpu_threads_idle();
}
}
static void qemu_tcg_wait_io_event(CPUState *cpu)
{
while (all_cpu_threads_idle()) {
while (qemu_tcg_should_sleep(cpu)) {
stop_tcg_kick_timer();
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
while (iothread_requesting_mutex) {
qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
}
start_tcg_kick_timer();
CPU_FOREACH(cpu) {
qemu_wait_io_event_common(cpu);
}
qemu_wait_io_event_common(cpu);
}
static void qemu_kvm_wait_io_event(CPUState *cpu)
@@ -1028,6 +1161,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->can_do_io = 1;
current_cpu = cpu;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
@@ -1036,9 +1170,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
current_cpu = cpu;
while (1) {
current_cpu = NULL;
qemu_mutex_unlock_iothread();
do {
int sig;
@@ -1049,7 +1181,6 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
exit(1);
}
qemu_mutex_lock_iothread();
current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
@@ -1115,9 +1246,11 @@ static int tcg_cpu_exec(CPUState *cpu)
cpu->icount_decr.u16.low = decr;
cpu->icount_extra = count;
}
qemu_mutex_unlock_iothread();
cpu_exec_start(cpu);
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
qemu_mutex_lock_iothread();
#ifdef CONFIG_PROFILER
tcg_time += profile_getclock() - ti;
#endif
@@ -1150,7 +1283,16 @@ static void deal_with_unplugged_cpus(void)
}
}
static void *qemu_tcg_cpu_thread_fn(void *arg)
/* Single-threaded TCG
*
* In the single-threaded case each vCPU is simulated in turn. If
* there is more than a single vCPU we create a simple timer to kick
* the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
* This is done explicitly rather than relying on side-effects
* elsewhere.
*/
static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
@@ -1172,15 +1314,18 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
/* process any pending work */
CPU_FOREACH(cpu) {
current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
}
/* process any pending work */
atomic_mb_set(&exit_request, 1);
start_tcg_kick_timer();
cpu = first_cpu;
/* process any pending work */
cpu->exit_request = 1;
while (1) {
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
qemu_account_warp_timer();
@@ -1189,7 +1334,10 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
cpu = first_cpu;
}
for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
atomic_mb_set(&tcg_current_rr_cpu, cpu);
current_cpu = cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
(cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
@@ -1200,22 +1348,32 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
break;
} else if (r == EXCP_ATOMIC) {
qemu_mutex_unlock_iothread();
cpu_exec_step_atomic(cpu);
qemu_mutex_lock_iothread();
break;
}
} else if (cpu->stop || cpu->stopped) {
} else if (cpu->stop) {
if (cpu->unplug) {
cpu = CPU_NEXT(cpu);
}
break;
}
} /* for cpu.. */
cpu = CPU_NEXT(cpu);
} /* while (cpu && !cpu->exit_request).. */
/* Pairs with smp_wmb in qemu_cpu_kick. */
atomic_mb_set(&exit_request, 0);
/* Does not need atomic_mb_set because a spurious wakeup is okay. */
atomic_set(&tcg_current_rr_cpu, NULL);
if (cpu && cpu->exit_request) {
atomic_mb_set(&cpu->exit_request, 0);
}
handle_icount_deadline();
qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
deal_with_unplugged_cpus();
}
@@ -1262,6 +1420,68 @@ static void CALLBACK dummy_apc_func(ULONG_PTR unused)
}
#endif
/* Multi-threaded TCG
*
* In the multi-threaded case each vCPU has its own thread. The TLS
* variable current_cpu can be used deep in the code to find the
* current CPUState for a given thread.
*/
static void *qemu_tcg_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
rcu_register_thread();
qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
cpu->can_do_io = 1;
current_cpu = cpu;
qemu_cond_signal(&qemu_cpu_cond);
/* process any pending work */
cpu->exit_request = 1;
while (1) {
if (cpu_can_run(cpu)) {
int r;
r = tcg_cpu_exec(cpu);
switch (r) {
case EXCP_DEBUG:
cpu_handle_guest_debug(cpu);
break;
case EXCP_HALTED:
/* during start-up the vCPU is reset and the thread is
* kicked several times. If we don't ensure we go back
* to sleep in the halted state we won't cleanly
* start-up when the vCPU is enabled.
*
* cpu->halted should ensure we sleep in wait_io_event
*/
g_assert(cpu->halted);
break;
case EXCP_ATOMIC:
qemu_mutex_unlock_iothread();
cpu_exec_step_atomic(cpu);
qemu_mutex_lock_iothread();
default:
/* Ignore everything else? */
break;
}
}
handle_icount_deadline();
atomic_mb_set(&cpu->exit_request, 0);
qemu_tcg_wait_io_event(cpu);
}
return NULL;
}
static void qemu_cpu_kick_thread(CPUState *cpu)
{
#ifndef _WIN32
@@ -1287,24 +1507,13 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
#endif
}
static void qemu_cpu_kick_no_halt(void)
{
CPUState *cpu;
/* Ensure whatever caused the exit has reached the CPU threads before
* writing exit_request.
*/
atomic_mb_set(&exit_request, 1);
cpu = atomic_mb_read(&tcg_current_cpu);
if (cpu) {
cpu_exit(cpu);
}
}
void qemu_cpu_kick(CPUState *cpu)
{
qemu_cond_broadcast(cpu->halt_cond);
if (tcg_enabled()) {
qemu_cpu_kick_no_halt();
cpu_exit(cpu);
/* NOP unless doing single-thread RR */
qemu_cpu_kick_rr_cpu();
} else {
if (hax_enabled()) {
/*
@@ -1342,27 +1551,14 @@ bool qemu_mutex_iothread_locked(void)
void qemu_mutex_lock_iothread(void)
{
atomic_inc(&iothread_requesting_mutex);
/* In the simple case there is no need to bump the VCPU thread out of
* TCG code execution.
*/
if (!tcg_enabled() || qemu_in_vcpu_thread() ||
!first_cpu || !first_cpu->created) {
qemu_mutex_lock(&qemu_global_mutex);
atomic_dec(&iothread_requesting_mutex);
} else {
if (qemu_mutex_trylock(&qemu_global_mutex)) {
qemu_cpu_kick_no_halt();
qemu_mutex_lock(&qemu_global_mutex);
}
atomic_dec(&iothread_requesting_mutex);
qemu_cond_broadcast(&qemu_io_proceeded_cond);
}
g_assert(!qemu_mutex_iothread_locked());
qemu_mutex_lock(&qemu_global_mutex);
iothread_locked = true;
}
void qemu_mutex_unlock_iothread(void)
{
g_assert(qemu_mutex_iothread_locked());
iothread_locked = false;
qemu_mutex_unlock(&qemu_global_mutex);
}
@@ -1392,13 +1588,6 @@ void pause_all_vcpus(void)
if (qemu_in_vcpu_thread()) {
cpu_stop_current();
if (!kvm_enabled()) {
CPU_FOREACH(cpu) {
cpu->stop = false;
cpu->stopped = true;
}
return;
}
}
while (!all_vcpus_paused()) {
@@ -1447,29 +1636,43 @@ void cpu_remove_sync(CPUState *cpu)
static void qemu_tcg_init_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
static QemuCond *tcg_halt_cond;
static QemuThread *tcg_cpu_thread;
static QemuCond *single_tcg_halt_cond;
static QemuThread *single_tcg_cpu_thread;
/* share a single thread for all cpus with TCG */
if (!tcg_cpu_thread) {
if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
tcg_halt_cond = cpu->halt_cond;
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
if (qemu_tcg_mttcg_enabled()) {
/* create a thread per vCPU with TCG (MTTCG) */
parallel_cpus = true;
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
cpu->cpu_index);
qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
} else {
/* share a single thread for all cpus with TCG */
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
qemu_thread_create(cpu->thread, thread_name,
qemu_tcg_rr_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
single_tcg_halt_cond = cpu->halt_cond;
single_tcg_cpu_thread = cpu->thread;
}
#ifdef _WIN32
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
tcg_cpu_thread = cpu->thread;
} else {
cpu->thread = tcg_cpu_thread;
cpu->halt_cond = tcg_halt_cond;
/* For non-MTTCG cases we share the thread */
cpu->thread = single_tcg_cpu_thread;
cpu->halt_cond = single_tcg_halt_cond;
}
}
@@ -1578,6 +1781,48 @@ int vm_stop(RunState state)
return do_vm_stop(state);
}
/**
* Prepare for (re)starting the VM.
* Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
* running or in case of an error condition), 0 otherwise.
*/
int vm_prepare_start(void)
{
RunState requested;
int res = 0;
qemu_vmstop_requested(&requested);
if (runstate_is_running() && requested == RUN_STATE__MAX) {
return -1;
}
/* Ensure that a STOP/RESUME pair of events is emitted if a
* vmstop request was pending. The BLOCK_IO_ERROR event, for
* example, according to documentation is always followed by
* the STOP event.
*/
if (runstate_is_running()) {
qapi_event_send_stop(&error_abort);
res = -1;
} else {
replay_enable_events();
cpu_enable_ticks();
runstate_set(RUN_STATE_RUNNING);
vm_state_notify(1, RUN_STATE_RUNNING);
}
/* We are sending this now, but the CPUs will be resumed shortly later */
qapi_event_send_resume(&error_abort);
return res;
}
void vm_start(void)
{
if (!vm_prepare_start()) {
resume_all_vcpus();
}
}
/* does a state transition even if the VM is already stopped,
current state is forgotten forever */
int vm_stop_force_state(RunState state)

Some files were not shown because too many files have changed in this diff Show More