Compare commits

...

245 Commits

Author SHA1 Message Date
David Gibson
125a9cb8e3 Update dtc submodule to v1.4.3
Since the last submodule update (which was v1.4.2) dtc and libfdt have
gained some features which would be useful in qemu.  There's now a v1.4.3
upstream release, so update our submodule to point to it.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 17:25:32 +11:00
Alexey Kardashevskiy
a438fa121f pseries: Update SLOF firmware image
Various fixes in this update, the full list is:

  > qemu-bootlist: Take the "-boot strict=off" setting properly into account
  > virtio-scsi: initialize vring avail queue buffers
  > virtio: Remove global variables in block and 9p driver
  > Remove superfluous checkpoints in tree.fs
  > Provide "write" function in the disk-label package
  > virtio: Implement block write support
  > scsi: Add SCSI block write support
  > deblocker: Add a 'write' function
  > virtio-scsi: Fix descriptor order for SCSI WRITE commands
  > board-qemu: Add a possibility to use hvterm input instead of USB keyboard
  > Do not try to use virtio-gpu in VGA mode
  > virtio: Fix stack comment of virtio-blk-read
  > envvar: Do not read default values for /options from the NVRAM anymore
  > envvar: Set properties in /options during "(set-defaults)"

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 17:25:32 +11:00
Peter Maydell
ecb24d334a Merge remote-tracking branch 'remotes/rth/tags/pull-tgt-20170302' into staging
Queued sparc patch

# gpg: Signature made Wed 01 Mar 2017 19:53:21 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tgt-20170302:
  target/sparc: Restore ldstub of odd asis

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 22:06:41 +00:00
Peter Maydell
6835504887 Merge remote-tracking branch 'remotes/kraxel/tags/pull-audio-20170301-1' into staging
audio: replay support, sdl2 fix.

# gpg: Signature made Wed 01 Mar 2017 15:38:09 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-audio-20170301-1:
  audio/sdlaudio: Allow audio playback with SDL2
  audio: make audio poll timer deterministic
  replay: add record/replay for audio passthrough

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 20:31:49 +00:00
Peter Maydell
b49d31a05a Merge remote-tracking branch 'remotes/kraxel/tags/pull-docs-20170301-1' into staging
docs: update sample configuration files

# gpg: Signature made Wed 01 Mar 2017 13:43:34 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-docs-20170301-1:
  mach-virt: Provide sample configuration files
  q35: Improve sample configuration files

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 19:27:30 +00:00
Peter Maydell
251501a371 Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20170228a' into staging
Migration pull

Note: The 'postcopy: Update userfaultfd.h header' is part of
Paolo's header update and will disappear if applied after it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

# gpg: Signature made Tue 28 Feb 2017 12:38:34 GMT
# gpg:                using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20170228a: (27 commits)
  postcopy: Add extra check for COPY function
  postcopy: Add doc about hugepages and postcopy
  postcopy: Check for userfault+hugepage feature
  postcopy: Update userfaultfd.h header
  postcopy: Allow hugepages
  postcopy: Send whole huge pages
  postcopy: Mask fault addresses to huge page boundary
  postcopy: Load huge pages in one go
  postcopy: Use temporary for placing zero huge pages
  postcopy: Plumb pagesize down into place helpers
  postcopy: Record largest page size
  postcopy: enhance ram_block_discard_range for hugepages
  exec: ram_block_discard_range
  postcopy: Chunk discards for hugepages
  postcopy: Transmit and compare individual page sizes
  postcopy: Transmit ram size summary word
  migration: fix use-after-free of to_dst_file
  migration: Update docs to discourage version bumps
  migration: fix id leak regression
  migrate: Introduce a 'dc->vmsd' check to avoid segfault for --only-migratable
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 17:39:12 +00:00
Peter Maydell
c9fc677a35 Merge remote-tracking branch 'remotes/elmarco/tags/leak-pull-request' into staging
# gpg: Signature made Wed 01 Mar 2017 09:02:53 GMT
# gpg:                using RSA key 0xDAE8E10975969CE5
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>"
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* remotes/elmarco/tags/leak-pull-request: (28 commits)
  tests: fix virtio-blk-test leaks
  tests: add specialized device_find function
  tests: fix usb-test leaks
  tests: allows to run single test in usb-hcd-ehci-test
  usb: release the created buses
  bus: do not unref hotplug handler
  tests: fix virtio-9p-test leaks
  tests: fix virtio-scsi-test leak
  tests: fix e1000e leaks
  tests: fix i440fx-test leaks
  tests: fix e1000-test leak
  tests: fix tco-test leaks
  tests: fix eepro100-test leak
  pc: pcihp: avoid adding ACPI_PCIHP_PROP_BSEL twice
  tests: fix ipmi-bt-test leak
  tests: fix ipmi-kcs-test leak
  tests: fix bios-tables-test leak
  tests: fix hd-geo-test leaks
  tests: fix ide-test leaks
  tests: fix vhost-user-test leaks
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 15:25:37 +00:00
Peter Maydell
ab711e216b Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.9-20170301' into staging
ppc patch queue for 2017-03-01

I was hoping to get this pull request squeezed in before the soft
freeze, but I ran into some difficulties during testing.  Everything
here was at least posted before the soft freeze, so I'm hoping we can
still merge it for 2.9.

The biggest things here are:
    * Cleanups to handling of hashed page tables, that will make
      adding support for the POWER9 MMU easier
    * Cleanups to the XICS interrupt controller that will make
      implementing the powernv machine easier
    * TCG implementation of extended overflow and carry handling for
      POWER9

It also includes:
    * Increasing the CPU limit for pseries to 1024 vCPUs
    * Generating proper OF node names in qemu (making hotplug and
      coldplug logic closer together)

# gpg: Signature made Wed 01 Mar 2017 04:43:06 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.9-20170301: (50 commits)
  Add PowerPC 32-bit guest memory dump support
  ppc/xics: rename 'ICPState *' variables to 'icp'
  ppc/xics: move InterruptStatsProvider to the sPAPR machine
  ppc/xics: move ics-simple post_load under the machine
  ppc/xics: remove the XICSState classes
  ppc/xics: export the XICS init routines
  ppc/xics: move the ICP array under the sPAPR machine
  ppc/xics: register the reset handler of ICP objects
  ppc/xics: simplify spapr_dt_xics() interface
  ppc/xics: use the QOM interface to grab an ICP
  ppc/xics: move the cpu_setup() handler under the ICPState class
  ppc/xics: simplify the cpu_setup() handler
  ppc/xics: move kernel_xics_fd out of KVMXICSState
  ppc/xics: extend the QOM interface to handle ICPs
  ppc/xics: remove the XICS list of ICS
  ppc/xics: register the reset handler of ICS objects
  ppc/xics: remove xics_find_source()
  ppc/xics: use the QOM interface to resend irqs
  ppc/xics: use the QOM interface to get irqs
  ppc/xics: use the QOM interface under the sPAPR machine
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 13:50:55 +00:00
Peter Maydell
4bc0d39a2f Merge remote-tracking branch 'remotes/mcayland/tags/qemu-openbios-signed' into staging
Update OpenBIOS images

# gpg: Signature made Tue 28 Feb 2017 22:09:11 GMT
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* remotes/mcayland/tags/qemu-openbios-signed:
  Update OpenBIOS images to 0cd97cc built from submodule.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 12:30:11 +00:00
Peter Maydell
666095c852 Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
x86 queue, 2017-02-27

"-cpu max" and query-cpu-model-expansion support for x86. This
should be the last x86 pull request before 2.9 soft freeze.

# gpg: Signature made Mon 27 Feb 2017 16:24:15 GMT
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request:
  i386: Improve query-cpu-model-expansion full mode
  i386: Implement query-cpu-model-expansion QMP command
  i386: Define static "base" CPU model
  i386: Don't set CPUClass::cpu_def on "max" model
  i386: Make "max" model not use any host CPUID info on TCG
  i386: Create "max" CPU model
  qapi-schema: Comment about full expansion of non-migration-safe models
  i386: Reorganize and document CPUID initialization steps
  i386: Rename X86CPU::host_features to X86CPU::max_features
  i386: Add ordering field to CPUClass
  i386: Unset cannot_destroy_with_object_finalize_yet on "host" model

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 11:18:01 +00:00
Peter Maydell
f1d640524d Merge remote-tracking branch 'remotes/kraxel/tags/pull-seabios-20170228-1' into staging
seabios: update to 1.10.2 release

# gpg: Signature made Tue 28 Feb 2017 08:57:57 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-seabios-20170228-1:
  seabios: update to 1.10.2 release

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 10:17:45 +00:00
Peter Maydell
d377b80338 Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170301' into staging
Queued TCG patch

# gpg: Signature made Tue 28 Feb 2017 21:30:32 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20170301:
  aarch64: Change ext type to TCGType to fix warnings

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 08:35:14 +00:00
Peter Maydell
b9fe31392b Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Tue 28 Feb 2017 20:35:32 GMT
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (46 commits)
  block: Add Error parameter to bdrv_append()
  block: Add Error parameter to bdrv_set_backing_hd()
  block: Assertions for resize permission
  block: Assertions for write permissions
  block: Pass BdrvChild to bdrv_aligned_preadv/pwritev and copy-on-read
  tests: Remove FIXME comments
  nbd/server: Use real permissions for NBD exports
  migration/block: Use real permissions
  hmp: Request permissions in qemu-io
  commit: Add filter-node-name to block-commit
  mirror: Add filter-node-name to blockdev-mirror
  stream: Use real permissions in streaming block job
  mirror: Use real permissions in mirror/active commit block job
  blockjob: Factor out block_job_remove_all_bdrv()
  block: Allow backing file links in change_parent_backing_link()
  block: BdrvChildRole.attach/detach() callbacks
  block: Fix pending requests check in bdrv_append()
  backup: Use real permissions in backup block job
  commit: Use real permissions for HMP 'commit'
  commit: Use real permissions in commit block job
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 23:09:46 +00:00
Peter Maydell
1e0addb682 Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20170228-tag' into staging
Xen 2017/02/28

# gpg: Signature made Tue 28 Feb 2017 19:13:08 GMT
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <sstabellini@kernel.org>"
# gpg:                 aka "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20170228-tag:
  Add a new qmp command to do checkpoint, query xen replication status
  Add a new qmp command to start/stop replication

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 20:33:47 +00:00
Richard Henderson
3db010c339 target/sparc: Restore ldstub of odd asis
Fixes the booting of ss20 roms.

Cc: qemu-stable@nongnu.org
Reported-by: Michael Russo <mike@papersolve.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-03-02 06:52:43 +11:00
Peter Maydell
b28f9db1a7 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170228-1' into staging
target-arm queue:
 * raspi2: add gpio controller and sdhost controller, with
   the wiring so the guest can switch which controller the
   SD card is attached to
   (this is sufficient to get raspbian kernels to boot)
 * GICv3: support state save/restore from KVM
 * update Linux headers to 4.11
 * refactor and QOMify the ARMv7M container object

# gpg: Signature made Tue 28 Feb 2017 17:11:49 GMT
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170228-1: (21 commits)
  bcm2835: add sdhost and gpio controllers
  bcm2835_gpio: add bcm2835 gpio controller
  hw/sd: add card-reparenting function
  qdev: Have qdev_set_parent_bus() handle devices already on a bus
  hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers
  target-arm: Add GICv3CPUState in CPUARMState struct
  hw/intc/arm_gicv3_kvm: Implement get/put functions
  hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate
  update Linux headers to 4.11
  update-linux-headers: update for 4.11
  stm32f205: Rename 'nvic' local to 'armv7m'
  stm32f205: Create armv7m object without using armv7m_init()
  armv7m: Split systick out from NVIC
  armv7m: Don't put core v7M devices under CONFIG_STELLARIS
  armv7m: Make bitband device take the address space to access
  armv7m: Make NVIC expose a memory region rather than mapping itself
  armv7m: Make ARMv7M object take memory region link
  armv7m: Use QOMified armv7m object in armv7m_init()
  armv7m: QOMify the armv7m container
  armv7m: Move NVICState struct definition into header
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 17:58:54 +00:00
Thomas Huth
bcf19777df audio/sdlaudio: Allow audio playback with SDL2
When compiling with SDL2, the semaphore trick used in sdlaudio.c
does not work - QEMU locks up completely in this case. To avoid
the hang and get at least some audio playback up and running (it's
a little bit crackling, but better than nothing), we can use the
SDL locking functions SDL_LockAudio() and SDL_UnlockAudio() to sync
with the sound playback thread instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-id: 1485852398-2327-1-git-send-email-thuth@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:12:03 +01:00
Pavel Dovgalyuk
1ffc266539 audio: make audio poll timer deterministic
This patch changes resetting strategy of the audio polling timer.
It does not change expiration time if the timer is already set.
This patch is needed to make this timer deterministic and to use execution
record/replay for audio devices.

audio_reset_timer is used in the function audio_vm_change_state_handler.
Therefore every time VM is stopped or restarted the timer will be reset
to new timeout. Virtual clock does not proceed while VM is stopped.
Therefore there is no need in resetting the timeout when VM restarts.

v2: updated commit message
v3: now using timer_mod_anticipate function (as suggested by Yurii Zubrytskyi)

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-id: 20170214071510.6112.76764.stgit@PASHA-ISP
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:12:03 +01:00
Pavel Dovgalyuk
3d4d16f4dc replay: add record/replay for audio passthrough
This patch adds recording and replaying audio data. Is saves synchronization
information for audio out and inputs from the microphone.

v2: removed unneeded whitespace change

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-id: 20170202055054.4848.94901.stgit@PASHA-ISP.lan02.inno

[ kraxel: add qemu/error-report.h include to fix osx build failure ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:11:44 +01:00
Peter Maydell
7287e3556f Merge remote-tracking branch 'remotes/gkurz/tags/cve-2016-9602-for-upstream' into staging
This pull request have all the fixes for CVE-2016-9602, so that it can
be easily picked up by downstreams, as suggested by Michel Tokarev.

# gpg: Signature made Tue 28 Feb 2017 10:21:32 GMT
# gpg:                using DSA key 0x02FC3AEB0101DBC2
# gpg: Good signature from "Greg Kurz <groug@kaod.org>"
# gpg:                 aka "Greg Kurz <groug@free.fr>"
# gpg:                 aka "Greg Kurz <gkurz@linux.vnet.ibm.com>"
# gpg:                 aka "Gregory Kurz (Groug) <groug@free.fr>"
# gpg:                 aka "[jpeg image of size 3330]"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2BD4 3B44 535E C0A7 9894  DBA2 02FC 3AEB 0101 DBC2

* remotes/gkurz/tags/cve-2016-9602-for-upstream: (28 commits)
  9pfs: local: drop unused code
  9pfs: local: open2: don't follow symlinks
  9pfs: local: mkdir: don't follow symlinks
  9pfs: local: mknod: don't follow symlinks
  9pfs: local: symlink: don't follow symlinks
  9pfs: local: chown: don't follow symlinks
  9pfs: local: chmod: don't follow symlinks
  9pfs: local: link: don't follow symlinks
  9pfs: local: improve error handling in link op
  9pfs: local: rename: use renameat
  9pfs: local: renameat: don't follow symlinks
  9pfs: local: lstat: don't follow symlinks
  9pfs: local: readlink: don't follow symlinks
  9pfs: local: truncate: don't follow symlinks
  9pfs: local: statfs: don't follow symlinks
  9pfs: local: utimensat: don't follow symlinks
  9pfs: local: remove: don't follow symlinks
  9pfs: local: unlinkat: don't follow symlinks
  9pfs: local: lremovexattr: don't follow symlinks
  9pfs: local: lsetxattr: don't follow symlinks
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 13:53:20 +00:00
Peter Maydell
e3280ffbf5 Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
# gpg: Signature made Tue 28 Feb 2017 12:40:00 GMT
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  .shippable: add s390x-cross target
  new: dockerfiles/debian-s390-cross

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 13:06:00 +00:00
Marc-André Lureau
80e1eea37a tests: fix virtio-blk-test leaks
Use qvirtio_pci_device_find_slot() to avoid leaking the non-hp
device. Add assert() to avoid further leaks in the future.

Use qvirtio_pci_device_free() to correctly free QVirtioPCIDevice.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:58:57 +04:00
Marc-André Lureau
c4523aae06 tests: add specialized device_find function
Allow specifying which slot to look for the device.

This will be used in the following patch to avoid leaking when multiple
devices exists and we want to lookup the hotplug one.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:57:04 +04:00
Marc-André Lureau
62030ed135 tests: fix usb-test leaks
Fix the usb tests leaks.

Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 11:51:29 +04:00
Marc-André Lureau
d3510ff9d7 tests: allows to run single test in usb-hcd-ehci-test
pci_init() shouldn't be a test function, but instead called before any
test. This allows to run a single test with -p /x86_64/ehci/....

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 11:51:29 +04:00
Marc-André Lureau
cd7bc87868 usb: release the created buses
Leaks spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 11:51:29 +04:00
Marc-André Lureau
675f22c6d3 bus: do not unref hotplug handler
Apparently, none of the bus owner give a reference to the hotplug
handler property, do not unref it on bus release.

Furthermore, a bus is allowed to be its own hotplug handler, which can
be seen in qbus_set_bus_hotplug_handler() function. However, in this
case, the reference can't be given to the property, or this will create
a cyclic dependency and the bus will never be free.

Each bus owner should manage the lifecycle of the hotplug handler.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
2b880bcdbe tests: fix virtio-9p-test leaks
Spotted by ASAN.

Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
3caab54d08 tests: fix virtio-scsi-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
448fe3c134 tests: fix e1000e leaks
Spotted by ASAN.

This hunk adds an assertion. It checks that we're finding no more than
one e1000e device: each hit allocates, but there is only one g_free().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
1bab33ab4a tests: fix i440fx-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
8829e16f5e tests: fix e1000-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
34779e8c39 tests: fix tco-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:51:25 +04:00
Marc-André Lureau
6afff1ffa3 tests: fix eepro100-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Weil <sw@weilnetz.de>
2017-03-01 11:51:05 +04:00
Igor Mammedov
f0c9d64a68 pc: pcihp: avoid adding ACPI_PCIHP_PROP_BSEL twice
PCI hotplug for bridges was introduced only since 2.0 however
  acpi_set_bsel()->object_property_add_uint32_ptr(bus, ACPI_PCIHP_PROP_BSEL)
didn't take in account that for legacy mode (1.7) when
PCI hotplug for bridges is unavailable and ACPI_PCIHP_PROP_BSEL property
the only bus "PCI.0' has been created earlier at acpi_pcihp_init() time.

We managed to live with it only because of error rised by adding
a duplicate property in acpi_set_bsel() has been ignored which
resulted in useless leaking of just allocated (int)bus_bsel.

Issue affects only 1.7 machine type as ACPI tables supported by
QEMU were introduced at that time, but there wasn't PCI hotplug
for bridges till the next release (2.0).

Fix it by removing duplicate ACPI_PCIHP_PROP_BSEL intialization
in acpi_pcihp_init() and doing it only in one place acpi_set_pci_info().

PS:
do not ignore error returned by object_property_add_uint32_ptr()
and abort QEMU since it's programming error which should be fixed
instead of being ignored.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reported-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1470211497-116801-1-git-send-email-imammedo@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
[ Marc-André - Remove now unused ACPI_PCIHP_LEGACY_SIZE ]
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
2017-03-01 11:51:05 +04:00
Marc-André Lureau
2607b660e9 tests: fix ipmi-bt-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 11:51:05 +04:00
Marc-André Lureau
3f5f5d04cd tests: fix ipmi-kcs-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 11:51:05 +04:00
Marc-André Lureau
f11dc27bcc tests: fix bios-tables-test leak
The inside array should be free too.
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:04 +04:00
Marc-André Lureau
2c8f86961b tests: fix hd-geo-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:04 +04:00
Marc-André Lureau
f5aa4bdc76 tests: fix ide-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-03-01 11:51:04 +04:00
Marc-André Lureau
0c0eb30260 tests: fix vhost-user-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:51:00 +04:00
Marc-André Lureau
fb6faea888 tests: fix q35-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:50:33 +04:00
Mike Nawrocki
356bb70ed1 Add PowerPC 32-bit guest memory dump support
This patch extends support for the `dump-guest-memory` command to the
32-bit PowerPC architecture. It relies on the assumption that a 64-bit
guest will not dump a 32-bit core file (and vice versa).

[dwg: I suspect this patch won't cover all cases, in particular a
32-bit machine type on a 64-bit qemu build.  However, it does strictly
more than what we had before, so might as well apply as a starting
point]

Signed-off-by: Mike Nawrocki <michael.nawrocki@gtri.gatech.edu>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:53:58 +11:00
Cédric Le Goater
8e4fba203e ppc/xics: rename 'ICPState *' variables to 'icp'
'ICPState *' variables are currently named 'ss'. This is confusing, so
let's give them an appropriate name: 'icp'.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
6449da4545 ppc/xics: move InterruptStatsProvider to the sPAPR machine
It provides a better monitor output of the ICP and ICS objects, else
the objects are printed out of order.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
a7ff1212e9 ppc/xics: move ics-simple post_load under the machine
The ICS object uses a post_load() handler which is implicitly relying
on the fact that the internal state of the ICS and ICP objects has
been restored but this is not guaranteed. So, let's move the code
under the post_load() handler of the machine where we know the objects
have been fully restored.

The icp_resend() handler of the XICSFabric QOM interface is also
removed as it is now obsolete.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
e6f7e110ee ppc/xics: remove the XICSState classes
The XICSState classes are not used anymore. They have now been fully
deprecated by the XICSFabric QOM interface. Do the cleanups.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
2192a9303d ppc/xics: export the XICS init routines
There is nothing left related to the XICS object in the realize
functions of the KVMXICSState and XICSState class. So adapt the
interfaces to call these routines directly from the sPAPR machine init
sequence.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
852ad27e14 ppc/xics: move the ICP array under the sPAPR machine
This is the last step to remove the XICSState abstraction and have the
machine hold all the objects related to interrupts : ICSs and ICPs.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
20147f2fce ppc/xics: register the reset handler of ICP objects
The reset of the ICP objects is currently handled by XICS but this can
be done for each individual ICP.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
b0ec31290c ppc/xics: simplify spapr_dt_xics() interface
spapr_dt_xics() only needs the number of servers to build the device
tree nodes. Let's change the routine interface to reflect that.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
b4f27d71e3 ppc/xics: use the QOM interface to grab an ICP
Also introduce a xics_icp_get() helper to simplify the changes.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
f023243432 ppc/xics: move the cpu_setup() handler under the ICPState class
The cpu_setup() handler is currently under the XICSState class but it
really belongs under ICPState as it is setting up an individual vCPU.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
bf50860d1b ppc/xics: simplify the cpu_setup() handler
The cpu_setup() handler currently takes a 'XICSState *' argument to
grab the kernel ICP file descriptor. This interface can be simplified
by using the 'xics' backlink of the ICP object.

This change is also required by subsequent patches which makes use of
the QOM interface for XICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
729f8a4f48 ppc/xics: move kernel_xics_fd out of KVMXICSState
The kernel ICP file descriptor is the only reason behind the
KVMXICSState class and it's in the way of more cleanups. Let's make it
a static for the moment and move forward.

If this is problem, we could use an attribute under the sPAPR machine
later on.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
b2fc59aaf9 ppc/xics: extend the QOM interface to handle ICPs
Let's add two new handlers for ICPs. One is to get an ICP object from
a server number and a second is to resend the irqs when needed.

The icp_resend() handler is a temporary workaround needed by the
ics-simple post_load() handler. It will be removed when the post_load
portion can be done at the machine level.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
d114a66225 ppc/xics: remove the XICS list of ICS
This is not used anymore.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
c79b2fdd7b ppc/xics: register the reset handler of ICS objects
The reset of the ICS objects is currently handled by XICS but this can
be done for each individual ICS. This also reduces the use of the XICS
list of ICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
be1fe35199 ppc/xics: remove xics_find_source()
It is not used anymore now that we have the QOM interface for XICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
2cd908d0ad ppc/xics: use the QOM interface to resend irqs
Also change the ICPState 'xics' backlink to be a XICSFabric, this
removes the need of using qdev_get_machine() to get the QOM interface
in some of the routines.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
f7759e4331 ppc/xics: use the QOM interface to get irqs
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
7844e12b28 ppc/xics: use the QOM interface under the sPAPR machine
Add 'ics_get' and 'ics_resend' handlers to the sPAPR machine. These
are relatively simple for a single ICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
51b180051e ppc/xics: introduce a XICSFabric QOM interface to handle ICSs
This interface provides two simple handlers. One is to get an ICS
(Interrupt Source Controller) object from an irq number and a second
to resend the irqs when needed.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
b9038e7806 ppc/xics: add an InterruptStatsProvider interface to ICS and ICP objects
This is, again, to reduce the use of the list of ICS objects. Let's
make each individual ICS and ICP object an InterruptStatsProvider and
remove this same interface from XICSState.

The InterruptStatsProvider will be moved at the machine level after
the XICS cleanups are completed.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
681bfaded6 ppc/xics: store the ICS object under the sPAPR machine
A list of ICS objects was introduced under the XICS object for the
PowerNV machine but, for the sPAPR machine, it brings extra complexity
as there is only a single ICS. To simplify the code, let's add the ICS
pointer under the sPAPR machine and try to reduce the use of this list
where possible.

Also, change the xics_spapr_*() routines to use an ICS object instead
of an XICSState and change their name to reflect that these are
specific to the sPAPR ICS object.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
817bb6a446 ppc/xics: remove set_nr_servers() handler from XICSStateClass
Today, the ICP (Interrupt Controller Presenter) objects are created by
the 'nr_servers' property handler of the XICS object and a class
handler. They are realized in the XICS object realize routine.

Let's simplify the process by creating the ICP objects along with the
XICS object at the machine level.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
4e4169f7a2 ppc/xics: remove set_nr_irqs() handler from XICSStateClass
Today, the ICS (Interrupt Controller Source) object is created and
realized by the init and realize routines of the XICS object, but some
of the parameters are only known at the machine level.

These parameters are passed from the sPAPR machine to the ICS object
in a rather convoluted way using property handlers and a class handler
of the XICS object. The number of irqs required to allocate the IRQ
state objects in the ICS realize routine is one of them.

Let's simplify the process by creating the ICS object along with the
XICS object at the machine level and link the ICS into the XICS list
of ICSs at this level also. In the sPAPR machine, there is only a
single ICS but that will change with the PowerNV machine.

Also, QOMify the creation of the objects and get rid of the
superfluous code.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
738d5db824 xics: XICS should not be a SysBusDevice
Currently xics - the component of the IBM POWER interrupt controller
representing the overall interrupt fabric / architecture is
represented as a descendent of SysBusDevice.  However, this is not
really correct - the xics presents nothing in MMIO space so it should
be an "unattached" device in the current QOM model.

Since this device will always be created by the machine type, not created
specifically from the command line, and because it has no migrated state
it should be safe to move it around the device composition tree.

Therefore this patch changes it to a descendent of TYPE_DEVICE, and
makes it an unattached device.  So that its reset handler still gets
called correctly, we add a qdev_set_parent_bus() to attach it to
sysbus.  It's not really clear that's correct (instead of using
register_reset()) but it appears to a common technique.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
[clg corrected problems with reset]
Signed-off-by: Cédric Le Goater <clg@kaod.org>
[dwg folded together and updated commit message]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Greg Kurz
a8eeafda19 spapr/pci: populate PCI DT in reverse order
Since commit 1d2d974244 "spapr_pci: enumerate and add PCI device tree", QEMU
populates the PCI device tree in the opposite order compared to SLOF.

Before 1d2d974244:

Populating /pci@800000020000000
                     00 0000 (D) : 1af4 1000    virtio [ net ]
                     00 0800 (D) : 1af4 1001    virtio [ block ]
                     00 1000 (D) : 1af4 1009    virtio [ network ]
Populating /pci@800000020000000/unknown-legacy-device@2

7e5294b8 :  /pci@800000020000000
7e52b998 :  |-- ethernet@0
7e52c0c8 :  |-- scsi@1
7e52c7e8 :  +-- unknown-legacy-device@2 ok

Since 1d2d974244:

Populating /pci@800000020000000
                     00 1000 (D) : 1af4 1009    virtio [ network ]
Populating /pci@800000020000000/unknown-legacy-device@2
                     00 0800 (D) : 1af4 1001    virtio [ block ]
                     00 0000 (D) : 1af4 1000    virtio [ net ]

7e5e8118 :  /pci@800000020000000
7e5ea6a0 :  |-- unknown-legacy-device@2
7e5eadb8 :  |-- scsi@1
7e5eb4d8 :  +-- ethernet@0 ok

This behaviour change is not actually a bug since no assumptions should be
made on DT ordering. But it has no real justification either, other than
being the consequence of the way fdt_add_subnode() inserts new elements
to the front of the FDT rather than adding them to the tail.

This patch reverts to the historical SLOF ordering by walking PCI devices
in reverse order. This reconciles pseries with x86 machine types behavior.
It is expected to make things easier when porting existing applications to
power.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Tested-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
(slight update to the changelog)
Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
b63d043418 target/ppc: add mcrxrx instruction
mcrxrx: Move to CR from XER Extended

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
c44027ffb9 target/ppc: add ov32 flag in divide operations
Add helper_div_compute_ov() in the int_helper for updating the overflow
flags.

For Divide Word:
SO, OV, and OV32 bits reflects overflow of the 32-bit result

For Divide DoubleWord:
SO, OV, and OV32 bits reflects overflow of the 64-bit result

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
61aa9a697a target/ppc: add ov32 flag for multiply low insns
For Multiply Word:
SO, OV, and OV32 bits reflects overflow of the 32-bit result

For Multiply DoubleWord:
SO, OV, and OV32 bits reflects overflow of the 64-bit result

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
1480d71cbe target/ppc: use tcg ops for neg instruction
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
dc0ad84449 target/ppc: update overflow flags for add/sub
* SO and OV reflects overflow of the 64-bit result in 64-bit mode and
  overflow of the low-order 32-bit result in 32-bit mode

* OV32 reflects overflow of the low-order 32-bit independent of the mode

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
33903d0aa4 target/ppc: update ca32 in arithmetic substract
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
6b10d008a0 target/ppc: update ca32 in arithmetic add
Adds routine to compute ca32 - gen_op_arith_compute_ca32

For 64-bit mode use the compute ca32 routine. While for 32-bit mode, CA
and CA32 will have same value.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
dd09c36159 target/ppc: support for 32-bit carry and overflow
POWER ISA 3.0 adds CA32 and OV32 status in 64-bit mode. Add the flags
and corresponding defines.

Moreover, CA32 is updated when CA is updated and OV32 is updated when OV
is updated.

Arithmetic instructions:
    * Addition and Substractions:

        addic, addic., subfic, addc, subfc, adde, subfe, addme, subfme,
        addze, and subfze always updates CA and CA32.

        => CA reflects the carry out of bit 0 in 64-bit mode and out of
           bit 32 in 32-bit mode.
        => CA32 reflects the carry out of bit 32 independent of the
           mode.

        => SO and OV reflects overflow of the 64-bit result in 64-bit
           mode and overflow of the low-order 32-bit result in 32-bit
           mode
        => OV32 reflects overflow of the low-order 32-bit independent of
           the mode

    * Multiply Low and Divide:

        For mulld, divd, divde, divdu and divdeu: SO, OV, and OV32 bits
        reflects overflow of the 64-bit result

        For mullw, divw, divwe, divwu and divweu: SO, OV, and OV32 bits
        reflects overflow of the 32-bit result

     * Negate with OE=1 (nego)

       For 64-bit mode if the register RA contains
       0x8000_0000_0000_0000, OV and OV32 are set to 1.

       For 32-bit mode if the register RA contains 0x8000_0000, OV and
       OV32 are set to 1.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
e78308fd39 target/ppc: Correct SDR1 masking
SDR_64_HTABORG, which indicates the bits of the SDR1 register to use for
the base of a 64-bit machine's hashed page table (HPT) isn't correct.  It
includes the top 46 bits of the register, but in fact the top 4 bits must
be zero (according to the ISA v2.07).  No actual implementation has
supported close to 2^60 bytes of physical address space, so it's kind of
irrelevant, but we might as well correct this.

In addition, although we checked for bad size values in SDR1, we never
reported an error if entirely invalid bits were set there.  Add this check
to ppc_store_sdr1().

Reported-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Suraj Jitindar Singh
8d63351f9f target/ppc: Remove the function ppc_hash64_set_sdr1()
The function ppc_hash64_set_sdr1 basically checked the htabsize and set an
error if it was too big, otherwise it just stored the value in SPR_SDR1.

Given that the only function which calls ppc_hash64_set_sdr1() is
ppc_store_sdr1(), why not handle the checking in ppc_store_sdr1() to avoid
the extra function call. Note that ppc_store_sdr1() already stores the
value in SPR_SDR1 anyway, so we were doing it twice.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
[dwg: Remove unnecessary error temporary]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
e57ca75ce3 target/ppc: Manage external HPT via virtual hypervisor
The pseries machine type implements the behaviour of a PAPR compliant
hypervisor, without actually executing such a hypervisor on the virtual
CPU.  To do this we need some hooks in the CPU code to make hypervisor
facilities get redirected to the machine instead of emulated internally.

For hypercalls this is managed through the cpu->vhyp field, which points
to a QOM interface with a method implementing the hypercall.

For the hashed page table (HPT) - also a hypervisor resource - we use an
older hack.  CPUPPCState has an 'external_htab' field which when non-NULL
indicates that the HPT is stored in qemu memory, rather than within the
guest's address space.

For consistency - and to make some future extensions easier - this merges
the external HPT mechanism into the vhyp mechanism.  Methods are added
to vhyp for the basic operations the core hash MMU code needs: map_hptes()
and unmap_hptes() for reading the HPT, store_hpte() for updating it and
hpt_mask() to retrieve its size.

To match this, the pseries machine now sets these vhyp fields in its
existing vhyp class, rather than reaching into the cpu object to set the
external_htab field.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
36778660d7 target/ppc: Eliminate htab_base and htab_mask variables
CPUPPCState includes fields htab_base and htab_mask which store the base
address (GPA) and size (as a mask) of the guest's hashed page table (HPT).
These are set when the SDR1 register is updated.

Keeping these in sync with the SDR1 is actually a little bit fiddly, and
probably not useful for performance, since keeping them expands the size of
CPUPPCState.  It also makes some upcoming changes harder to implement.

This patch removes these fields, in favour of calculating them directly
from the SDR1 contents when necessary.

This does make a change to the behaviour of attempting to write a bad value
(invalid HPT size) to the SDR1 with an mtspr instruction.  Previously, the
bad value would be stored in SDR1 and could be retrieved with a later
mfspr, but the HPT size as used by the softmmu would be, clamped to the
allowed values.  Now, writing a bad value is treated as a no-op.  An error
message is printed in both new and old versions.

I'm not sure which behaviour, if either, matches real hardware.  I don't
think it matters that much, since it's pretty clear that if an OS writes
a bad value to SDR1, it's not going to boot.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2017-03-01 11:23:39 +11:00
David Gibson
7222b94a83 target/ppc: Cleanup HPTE accessors for 64-bit hash MMU
Accesses to the hashed page table (HPT) are complicated by the fact that
the HPT could be in one of three places:
   1) Within guest memory - when we're emulating a full guest CPU at the
      hardware level (e.g. powernv, mac99, g3beige)
   2) Within qemu, but outside guest memory - when we're emulating user and
      supervisor instructions within TCG, but instead of emulating
      the CPU's hypervisor mode, we just emulate a hypervisor's behaviour
      (pseries in TCG or KVM-PR)
   3) Within the host kernel - a pseries machine using KVM-HV
      acceleration.  Mostly accesses to the HPT are handled by KVM,
      but there are a few cases where qemu needs to access it via a
      special fd for the purpose.

In order to batch accesses to the fd in case (3), we use a somewhat awkward
ppc_hash64_start_access() / ppc_hash64_stop_access() pair, which for case
(3) reads / releases several HPTEs from the kernel as a batch (usually a
whole PTEG).  For cases (1) & (2) it just returns an address value.  The
actual HPTE load helpers then need to interpret the returned token
differently in the 3 cases.

This patch keeps the same basic structure, but simplfiies the details.
First start_access() / stop_access() are renamed to map_hptes() and
unmap_hptes() to make their operation more obvious.  Second, map_hptes()
now always returns a qemu pointer, which can always be used in the same way
by the load_hpte() helpers.  In case (1) it comes from address_space_map()
in case (2) directly from qemu's HPT buffer and in case (3) from a
temporary buffer read from the KVM fd.

While we're at it, make things a bit more consistent in terms of types and
variable names: avoid variables named 'index' (it shadows index(3) which
can lead to confusing results), use 'hwaddr ptex' for HPTE indices and
uint64_t for each of the HPTE words, use ptex throughout the call stack
instead of pte_offset in some places (we still need that at the bottom
layer, but nowhere else).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
7d6250e3d1 target/ppc: SDR1 is a hypervisor resource
At present the SDR1 register - the base of the system's hashed page table
(HPT) - is represented as an SPR with supervisor read and write permission.
However, on CPUs which have a hypervisor mode, the SDR1 is a hypervisor
only resource.  Change the permission checking on the SPR to reflect this.

Now that this is done, we don't need to check for an external HPT executing
mtsdr1: an external HPT only applies when we're emulating the behaviour of
a hypervisor, rather than modelling the CPU's hypervisor mode internally,
so if we're permitted to execute mtsdr1, we don't have an external HPT.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
b7b0b1f13a target/ppc: Merge cpu_ppc_set_vhyp() with cpu_ppc_set_papr()
cpu_ppc_set_papr() sets up various aspects of CPU state for use with PAPR
paravirtualized guests.  However, it doesn't set the virtual hypervisor,
so callers must also call cpu_ppc_set_vhyp() so that PAPR hypercalls are
handled properly.  This is a bit silly, so fold setting the virtual
hypervisor into cpu_ppc_set_papr().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
c6404adebf pseries: Minor cleanups to HPT management hypercalls
* Standardize on 'ptex' instead of 'pte_index' for HPTE index variables
   for consistency and brevity
 * Avoid variables named 'index'; shadowing index(3) from libc can lead to
   surprising bugs if the variable is removed, because compiler errors
   might not appear for remaining references
 * Clarify index calculations in h_enter() - we have two cases, H_EXACT
   where the exact HPTE slot is given, and !H_EXACT where we search for
   an empty slot within the hash bucket.  Make the calculation more
   consistent between the cases.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
1ad9f0a464 target/ppc: Fix KVM-HV HPTE accessors
When a 'pseries' guest is running with KVM-HV, the guest's hashed page
table (HPT) is stored within the host kernel, so it is not directly
accessible to qemu.  Most of the time, qemu doesn't need to access it:
we're using the hardware MMU, and KVM itself implements the guest
hypercalls for manipulating the HPT.

However, qemu does need access to the in-KVM HPT to implement
get_phys_page_debug() for the benefit of the gdbstub, and maybe for
other debug operations.

To allow this, 7c43bca "target-ppc: Fix page table lookup with kvm
enabled" added kvmppc_hash64_read_pteg() to target/ppc/kvm.c to read
in a batch of HPTEs from the KVM table.  Unfortunately, there are a
couple of problems with this:

First, the name of the function implies it always reads a whole PTEG
from the HPT, but in fact in some cases it's used to grab individual
HPTEs (which ends up pulling 8 HPTEs, not aligned to a PTEG from the
kernel).

Second, and more importantly, the code to read the HPTEs from KVM is
simply wrong, in general.  The data from the fd that KVM provides is
designed mostly for compact migration rather than this sort of one-off
access, and so needs some decoding for this purpose.  The current code
will work in some cases, but if there are invalid HPTEs then it will
not get sane results.

This patch rewrite the HPTE reading function to have a simpler
interface (just read n HPTEs into a caller provided buffer), and to
correctly decode the stream from the kernel.

For consistency we also clean up the similar function for altering
HPTEs within KVM (introduced in c138593 "target-ppc: Update
ppc_hash64_store_hpte to support updating in-kernel htab").

Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Greg Kurz
6244bb7e58 sysemu: support up to 1024 vCPUs
Some systems can already provide more than 255 hardware threads.

Bumping the QEMU limit to 1024 seems reasonable:
- it has no visible overhead in top;
- the limit itself has no effect on hot paths.

Cc: Greg Kurz <gkurz@linux.vnet.ibm.com>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
f32899de97 target/ppc: introduce helper_update_ov_legacy
Removes duplicate code and will be useful for consolidating flags

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Laurent Vivier
2530a1a5cf spapr: generate DT node names
When DT node names for PCI devices are generated by SLOF,
they are generated according to the type of the device
(for instance, ethernet for virtio-net-pci device).

Node name for hotplugged devices is generated by QEMU.
This patch adds the mechanic to QEMU to create the node
name according to the device type too.

The data structure has been roughly copied from OpenBIOS/OpenHackware,
node names from SLOF.

Example:

Hotplugging some PCI cards with QEMU monitor:

device_add virtio-tablet-pci
device_add virtio-serial-pci
device_add virtio-mouse-pci
device_add virtio-scsi-pci
device_add virtio-gpu-pci
device_add ne2k_pci
device_add nec-usb-xhci
device_add intel-hda

What we can see in linux device tree:

for dir in /proc/device-tree/pci@800000020000000/*@*/; do
    echo $dir
    cat $dir/name
    echo
done

WITHOUT this patch:

/proc/device-tree/pci@800000020000000/pci@0/
pci
/proc/device-tree/pci@800000020000000/pci@1/
pci
/proc/device-tree/pci@800000020000000/pci@2/
pci
/proc/device-tree/pci@800000020000000/pci@3/
pci
/proc/device-tree/pci@800000020000000/pci@4/
pci
/proc/device-tree/pci@800000020000000/pci@5/
pci
/proc/device-tree/pci@800000020000000/pci@6/
pci
/proc/device-tree/pci@800000020000000/pci@7/
pci

WITH this patch:

/proc/device-tree/pci@800000020000000/communication-controller@1/
communication-controller
/proc/device-tree/pci@800000020000000/display@4/
display
/proc/device-tree/pci@800000020000000/ethernet@5/
ethernet
/proc/device-tree/pci@800000020000000/input-controller@0/
input-controller
/proc/device-tree/pci@800000020000000/mouse@2/
mouse
/proc/device-tree/pci@800000020000000/multimedia-device@7/
multimedia-device
/proc/device-tree/pci@800000020000000/scsi@3/
scsi
/proc/device-tree/pci@800000020000000/usb-xhci@6/
usb-xhci

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Laurent Vivier
089f7e827d PCI: add missing classes in pci_ids.h to build device tree
To allow QEMU to add PCI entries in device tree,
we must have a more exhaustive list of PCI class IDs.

This patch synchronizes as much as possible with
pci_ids.h and add some missing IDs from SLOF.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:38 +11:00
Nikunj A Dadhania
1bd33d0d7c target/ppc: optimize gen_write_xer()
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:38 +11:00
Nikunj A Dadhania
00b7078831 target/ppc: move cpu_[read, write]_xer to cpu.c
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:38 +11:00
Mark Cave-Ayland
9b40d1ee13 Update OpenBIOS images to 0cd97cc built from submodule.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-02-28 21:50:03 +00:00
Pranith Kumar
dc1eccd661 aarch64: Change ext type to TCGType to fix warnings
To fix the following warnings:

In file included from /users/pranith/qemu/tcg/tcg.c:255:
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:879:24: warning: implicit conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different enumeration type 'TCGType' (aka 'enum TCGType')
      [-Wenum-conversion]
        tcg_out_cmp(s, ext, a, b, b_const);
        ~~~~~~~~~~~    ^~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:893:36: warning: implicit conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different enumeration type 'TCGType' (aka 'enum TCGType')
      [-Wenum-conversion]
        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:389:65: note: expanded from macro 'tcg_out_insn'
    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
                                                                ^
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:895:37: warning: implicit conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different enumeration type 'TCGType' (aka 'enum TCGType')
      [-Wenum-conversion]
        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:389:65: note: expanded from macro 'tcg_out_insn'
    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
                                                                ^
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:1610:27: warning: implicit conversion from enumeration type 'TCGType' (aka 'enum TCGType') to different enumeration type 'TCGMemOp' (aka 'enum TCGMemOp')
      [-Wenum-conversion]
        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
        ~~~~~~~~~~~~~~    ^~~

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <20170217154311.13920-1-bobby.prani@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-03-01 08:28:06 +11:00
Marc-André Lureau
f3f8e81150 tests: fix endianness-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
072bdb07c5 tests: fix ptimer leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
461a862022 glib-compat: add g_test_add_data_func_full fallback
Move the fallback from qtest_add_data_func_full() to glib-compat.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
e703dcbaee timer: use an inline function for free
Similarly to allocation, do it from an inline function. This allows
tests to only use the headers for allocation/free of timer.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
14324f585d tests: fix leaks in test-io-channel-command
No need for strdup, fix leaks when socat is missing.

Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: "Daniel P. Berrange" <berrange@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
dc491fead0 tests: fix qmp response leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
fc34059f08 qtest: fix a memory leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 00:09:28 +04:00
Peter Maydell
758af5e862 Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20170228' into staging
Network boot for s390x. More information (and instructions
for building a s390-netboot.img) can be found at
http://wiki.qemu-project.org/Features/S390xNetworkBoot

# gpg: Signature made Tue 28 Feb 2017 11:27:18 GMT
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20170228:
  pc-bios/s390-ccw.img: rebuild image
  pc-bios/s390-ccw: Use the ccw bios to start the network boot
  s390x/ipl: Load network boot image
  s390x/ipl: Extend S390IPLState to support network boot
  elf-loader: Allow late loading of elf

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 19:52:26 +00:00
Kevin Wolf
b2c2832c61 block: Add Error parameter to bdrv_append()
Aborting on error in bdrv_append() isn't correct. This patch fixes it
and lets the callers handle failures.

Test case 085 needs a reference output update. This is caused by the
reversed order of bdrv_set_backing_hd() and change_parent_backing_link()
in bdrv_append(): When the backing file of the new node is set, the
parent nodes are still pointing to the old top, so the backing blocker
is now initialised with the node name rather than the BlockBackend name.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:51 +01:00
Kevin Wolf
12fa4af61f block: Add Error parameter to bdrv_set_backing_hd()
Not all callers of bdrv_set_backing_hd() know for sure that attaching
the backing file will be allowed by the permission system. Return the
error from the function rather than aborting.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:51 +01:00
Kevin Wolf
c8f6d58edb block: Assertions for resize permission
This adds an assertion that ensures that the necessary resize permission
has been granted before bdrv_truncate() is called.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
afa4b29323 block: Assertions for write permissions
This adds assertions that ensure that the necessary write permissions
have been granted before someone attempts to write to a node.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
85c97ca7a1 block: Pass BdrvChild to bdrv_aligned_preadv/pwritev and copy-on-read
This is where we want to check the permissions, so we need to have the
BdrvChild around where they are stored.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
2807c0cd43 tests: Remove FIXME comments
Not requesting any permissions is actually correct for these test cases
because no actual I/O or other operation covered by the permission
system is performed.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
8a7ce4f933 nbd/server: Use real permissions for NBD exports
NBD can't cope with device size changes, so resize must be forbidden,
but otherwise we can tolerate anything. Depending on whether the export
is writable or not, we only require consistent reads and writes.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
6f5ef23a3f migration/block: Use real permissions
Request BLK_PERM_CONSISTENT_READ for the source of block migration, and
handle potential permission errors as good as we can in this place
(which is not very good, but it matches the other failure cases).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
887354bd13 hmp: Request permissions in qemu-io
The HMP command 'qemu-io' is a bit tricky because it wants to work on
the original BlockBackend, but additional permissions could be required.
The details are explained in a comment in the code, but in summary, just
request whatever permissions the current qemu-io command needs.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
0db832f42e commit: Add filter-node-name to block-commit
Management tools need to be able to know about every node in the graph
and need a way to address them. Changing the graph structure was okay
because libvirt doesn't really manage the node level yet, but future
libvirt versions need to deal with both new and old version of qemu.

This new option to blockdev-commit allows the client to set a node-name
for the automatically inserted filter driver, and at the same time
serves as a witness for a future libvirt that this version of qemu does
automatically insert a filter driver.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
6cdbceb12c mirror: Add filter-node-name to blockdev-mirror
Management tools need to be able to know about every node in the graph
and need a way to address them. Changing the graph structure was okay
because libvirt doesn't really manage the node level yet, but future
libvirt versions need to deal with both new and old version of qemu.

This new option to blockdev-mirror allows the client to set a node-name
for the automatically inserted filter driver, and at the same time
serves as a witness for a future libvirt that this version of qemu does
automatically insert a filter driver.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
a170a91fd3 stream: Use real permissions in streaming block job
The correct permissions are relatively obvious here (and explained in
code comments). For intermediate streaming, we need to reopen the top
node read-write before creating the job now because the permissions
system catches attempts to get the BLK_PERM_WRITE_UNCHANGED permission
on a read-only node.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
4ef85a9c23 mirror: Use real permissions in mirror/active commit block job
The mirror block job is mainly used for two different scenarios:
Mirroring to an otherwise unused, independent target node, or for active
commit where the target node is part of the backing chain of the source.

Similarly to the commit block job patch, we need to insert a new filter
node to keep the permissions correct during active commit.

Note that one change this implies is that job->blk points to
mirror_top_bs as its root now, and mirror_top_bs (rather than the actual
source node) contains the bs->job pointer. This requires qemu-img commit
to get the job by name now rather than just taking bs->job.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Acked-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:46 +01:00
Kevin Wolf
bbc02b90bc blockjob: Factor out block_job_remove_all_bdrv()
In some cases, we want to remove op blockers on intermediate nodes
before the whole block job transaction has completed (because they block
restoring the final graph state during completion). Provide a function
for this.

The whole block job lifecycle is a bit messed up and it's hard to
actually do all things in the right order, but I'll leave simplifying
this for another day.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
3e44c8e08a block: Allow backing file links in change_parent_backing_link()
Now that the backing file child role implements .attach/.detach
callbacks, nothing prevents us from modifying the graph even if that
involves changing backing file links.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
db95dbba3b block: BdrvChildRole.attach/detach() callbacks
Backing files are somewhat special compared to other kinds of children
because they are attached and detached using bdrv_set_backing_hd()
rather than the normal set of functions, which does a few more things
like setting backing blockers, toggling the BDRV_O_NO_BACKING flag,
setting parent_bs->backing_file, etc.

These special features are a reason why change_parent_backing_link()
can't handle backing files yet. With abstracting the additional features
into .attach/.detach callbacks, we get a step closer to a function that
can actually deal with this.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
dd65a52e4a block: Fix pending requests check in bdrv_append()
bdrv_append() cares about isolation of the node that it modifies, but
not about activity in some subtree below it. Instead of using the
recursive bdrv_requests_pending(), directly check bs->in_flight, which
considers only the node in question.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
4e9e4323d5 backup: Use real permissions in backup block job
The backup block job doesn't have very complicated requirements: It
needs to read from the source and write to the target, but it's fine
with either side being changed. The only restriction is that we can't
resize the image because the job uses a cached value.

qemu-iotests 055 needs to be changed because it used a target which was
already attached to a virtio-blk device. The permission system correctly
forbids this (virtio-blk can't accept another writer with its default
share-rw=off).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
d3f0675922 commit: Use real permissions for HMP 'commit'
This is a little simpler than the commit block job because it's
synchronous and only commits into the immediate backing file, but
otherwise doing more or less the same.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
8dfba27977 commit: Use real permissions in commit block job
This is probably one of the most interesting conversions to the new
op blocker system because a commit block job intentionally leaves some
intermediate block nodes in the backing chain that aren't valid on their
own any more; only the whole chain together results in a valid view.

In order to provide the 'consistent read' permission to the parents of
the 'top' node of the commit job, a new filter block driver is inserted
above 'top' which doesn't require 'consistent read' on its backing
chain. Subsequently, the commit job can block 'consistent read' on all
intermediate nodes without causing a conflict.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
76d554e20b blockjob: Add permissions to block_job_add_bdrv()
Block jobs don't actually do I/O through the the reference they create
with block_job_add_bdrv(), but they might want to use the permisssion
system to express what the block job does to intermediate nodes. This
adds permissions to block_job_add_bdrv() to provide the means to request
permissions.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
26de9438c1 block: Add BdrvChildRole.stay_at_node
When the parents' child links are updated in bdrv_append() or
bdrv_replace_in_backing_chain(), this should affect all child links of
BlockBackends or other nodes, but not on child links held for other
purposes (like for setting permissions). This patch allows to control
the behaviour per BdrvChildRole.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
d083319fe0 block: Include details on permission errors in message
Instead of just telling that there was some conflict, we can be specific
and tell which permissions were in conflict and which way the conflict
is.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
b541155587 block: Add BdrvChildRole.get_parent_desc()
For meaningful error messages in the permission system, we need to get
some human-readable description of the parent of a BdrvChild.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
c6cc12bfa7 blockjob: Add permissions to block_job_create()
This functions creates a BlockBackend internally, so the block jobs need
to tell it what they want to do with the BB.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
dabd18f64c hw/block: Introduce share-rw qdev property
By default, don't allow another writer for block devices that are
attached to a guest device. For the cases where this setup is intended
(e.g. using a cluster filesystem on the disk), the new option can be
used to allow it.

This change affects only devices using DEFINE_BLOCK_PROPERTIES().
Devices directly using DEFINE_PROP_DRIVE() still accept writers
unconditionally.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
a17c17a274 hw/block: Request permissions
This makes all device emulations with a qdev drive property request
permissions on their BlockBackend. The only thing we block at this point
is resizing images for some devices that can't support it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
39829a01ae block: Allow error return in BlockDevOps.change_media_cb()
Some devices allow a media change between read-only and read-write
media. They need to adapt the permissions in their .change_media_cb()
implementation, which can fail. So add an Error parameter to the
function.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
c62d32f503 block: Request real permissions in blk_new_open()
We can figure out the necessary permissions from the flags that the
caller passed.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
55880601d8 block: Add BDRV_O_RESIZE for blk_new_open()
blk_new_open() is a convenience function that processes flags rather
than QDict options as a simple way to just open an image file.

In order to keep it convenient in the future, it must automatically
request the necessary permissions. This can easily be inferred from the
flags for read and write, but we need another flag that tells us whether
to get the resize permission.

We can't just always request it because that means that no block jobs
can run on the resulting BlockBackend (which is something that e.g.
qemu-img commit wants to do), but we also can't request it never because
most of the .bdrv_create() implementations call blk_truncate().

The solution is to introduce another flag that is passed by all users
that want to resize the image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
d7086422b1 block: Add error parameter to blk_insert_bs()
Now that blk_insert_bs() requests the BlockBackend permissions for the
node it attaches to, it can fail. Instead of aborting, pass the errors
to the callers.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
6d0eb64d5c block: Add permissions to blk_new()
We want every user to be specific about the permissions it needs, so
we'll pass the initial permissions as parameters to blk_new(). A user
only needs to call blk_set_perm() if it wants to change the permissions
after the fact.

The permissions are stored in the BlockBackend and applied whenever a
BlockDriverState should be attached in blk_insert_bs().

This does not include actually choosing the right set of permissions
everywhere yet. Instead, the usual FIXME comment is added to each place
and will be addressed in individual patches.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
981776b348 block: Add permissions to BlockBackend
The BlockBackend can now store the permissions that its user requires.
This is necessary because nodes can be ejected from or inserted into a
BlockBackend and all of these operations must make sure that the user
still gets what it requested initially.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
f68c598be6 block: Request real permissions in bdrv_attach_child()
Now that all block drivers with children tell us what permissions they
need from each of their children, bdrv_attach_child() can use this
information and make the right requirements while trying to attach new
children.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
78e421c9fb block: Require .bdrv_child_perm() with child nodes
All block drivers that can have child nodes implement .bdrv_child_perm()
now. Make this officially a requirement by asserting that only drivers
without children can omit .bdrv_child_perm().

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
91ef38257a vvfat: Implement .bdrv_child_perm()
vvfat is the last remaining driver that can have children, but doesn't
implement .bdrv_child_perm() yet. The default handlers aren't suitable
here, so let's implement a very simple driver-specific one that protects
the internal child from being used by other users as good as our
permissions permit.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
862f215fab block: Request child permissions in format drivers
This makes use of the .bdrv_child_perm() implementation for formats that
we just added. All format drivers expose the permissions they actually
need nows, so that they can be set accordingly and updated when parents
are attached or detached.

The only format not included here is raw, which was already converted
with the other filter drivers.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
6b1a044afb block: Default .bdrv_child_perm() for format drivers
Almost all format drivers have the same characteristics as far as
permissions are concerned: They have one or more children for storing
their own data and, more importantly, metadata (can be written to and
grow even without external write requests, must be protected against
other writers and present consistent data) and optionally a backing file
(this is just data, so like for a filter, it only depends on what the
parent nodes need).

This provides a default implementation that can be shared by most of
our format drivers.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
d7010dfb68 block: Request child permissions in filter drivers
All callers will have to request permissions for all of their child
nodes. Block drivers that act as simply filters can use the default
implementation of .bdrv_child_perm().

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
6a1b9ee152 block: Default .bdrv_child_perm() for filter drivers
Most filters need permissions related to read and write for their
children, but only if the node has a parent that wants to use the same
operation on the filter. The same is true for resize.

This adds a default implementation that simply forwards all necessary
permissions to all children of the node and leaves the other permissions
unchanged.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
33a610c398 block: Involve block drivers in permission granting
In many cases, the required permissions of one node on its children
depend on what its parents require from it. For example, the raw format
or most filter drivers only need to request consistent reads if that's
something that one of their parents wants.

In order to achieve this, this patch introduces two new BlockDriver
callbacks. The first one lets drivers first check (recursively) whether
the requested permissions can be set; the second one actually sets the
new permission bitmask.

Also add helper functions that drivers can use in their implementation
of the callbacks to update their permissions on a specific child.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
d5e6f437c5 block: Let callers request permissions when attaching a child node
When attaching a node as a child to a new parent, the required and
shared permissions for this parent are checked against all other parents
of the node now, and an error is returned if there is a conflict.

This allows error returns to a function that previously always
succeeded, and the same is true for quite a few callers and their
callers. Converting all of them within the same patch would be too much,
so for now everyone tells that they don't need any permissions and allow
everyone else to do anything. This way we can use &error_abort initially
and convert caller by caller to pass actual permission requirements and
implement error handling.

All these places are marked with FIXME comments and it will be the job
of the next patches to clean them up again.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
8b2ff5291f block: Add Error argument to bdrv_attach_child()
It will have to return an error soon, so prepare the callers for it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:35 +01:00
Kevin Wolf
7006c9a761 block: Add op blocker permission constants
This patch defines the permission categories that will be used by the
new op blocker system.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:35 +01:00
Markus Armbruster
9e19ad4e49 option: Tweak invalid size error message and unbreak iotest 049
Commit 75cdcd1 neglected to update tests/qemu-iotests/049.out, and
made the error message for negative size worse.  Fix that.

Reported-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-28 20:40:31 +01:00
Peter Lieven
2d9187bc65 qemu-img: make convert async
the convert process is currently completely implemented with sync operations.
That means it reads one buffer and then writes it. No parallelism and each sync
request takes as long as it takes until it is completed.

This can be a big performance hit when the convert process reads and writes
to devices which do not benefit from kernel readahead or pagecache.
In our environment we heavily have the following two use cases when using
qemu-img convert.

a) reading from NFS and writing to iSCSI for deploying templates
b) reading from iSCSI and writing to NFS for backups

In both processes we use libiscsi and libnfs so we have no kernel cache.

This patch changes the convert process to work with parallel running coroutines
which can significantly improve performance for network storage devices:

qemu-img (master)
 nfs -> iscsi 22.8 secs
 nfs -> ram   11.7 secs
 ram -> iscsi 12.3 secs

qemu-img-async (8 coroutines, in-order write disabled)
 nfs -> iscsi 11.0 secs
 nfs -> ram   10.4 secs
 ram -> iscsi  9.0 secs

This patches introduces 2 new cmdline parameters. The -m parameter to specify
the number of coroutines running in parallel (defaults to 8). And the -W parameter to
allow qemu-img to write to the target out of order rather than sequential. This improves
performance as the writes do not have to wait for each other to complete.

Signed-off-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-28 20:40:31 +01:00
Zhang Chen
daa33c5215 Add a new qmp command to do checkpoint, query xen replication status
We can call this qmp command to do checkpoint outside of qemu.
Xen colo will need this function.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wencongyang@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-02-28 11:02:12 -08:00
Zhang Chen
2c9639ecab Add a new qmp command to start/stop replication
We can call this qmp command to start/stop replication outside of qemu.
Like Xen colo need this function.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wencongyang@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-02-28 11:01:56 -08:00
Clement Deschamps
1eeb5c7dea bcm2835: add sdhost and gpio controllers
This adds the bcm2835_sdhost and bcm2835_gpio to the BCM2835 platform.

For supporting the SD controller selection (alternate function of GPIOs
48-53), the bcm2835_gpio now exposes an sdbus.
It also has a link to both the sdbus of sdhci and sdhost controllers,
and the card is reparented from one bus to another when the alternate
function of GPIOs 48-53 is modified.

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488293711-14195-5-git-send-email-peter.maydell@linaro.org
Message-id: 20170224164021.9066-5-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Clement Deschamps
d72fc9dcb1 bcm2835_gpio: add bcm2835 gpio controller
This adds the BCM2835 GPIO controller.

It currently implements:
- The 54 GPIOs as outputs (qemu_irq)
- The SD controller selection via alternate function of GPIOs 48-53

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488293711-14195-4-git-send-email-peter.maydell@linaro.org
Message-id: 20170224164021.9066-4-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Clement Deschamps
97fb87cc5d hw/sd: add card-reparenting function
Provide a new function sdbus_reparent_card() in sd core for reparenting
a card from a SDBus to another one.

This function is required by the raspi platform, where the two SD
controllers can be dynamically switched.

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488293711-14195-3-git-send-email-peter.maydell@linaro.org
Message-id: 20170224164021.9066-3-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: added a doc comment to the header file; changed to
 use new behaviour of qdev_set_parent_bus()]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Peter Maydell
91c968ac72 qdev: Have qdev_set_parent_bus() handle devices already on a bus
Instead of qdev_set_parent_bus() silently doing the wrong
thing if it's handed a device that's already on a bus,
have it remove the device from the old bus and add it to
the new one. This is useful for the raspi2 sdcard.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Message-id: 1488293711-14195-2-git-send-email-peter.maydell@linaro.org
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
07a5628cb8 hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers
Reset CPU interface registers of GICv3 when CPU is reset.
For this, ARMCPRegInfo struct is registered with one ICC
register whose resetfn is called when cpu is reset.

All the ICC registers are reset under one single register
reset function instead of calling resetfn for each ICC
register.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487850673-26455-6-git-send-email-vijay.kilari@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
d3a3e52962 target-arm: Add GICv3CPUState in CPUARMState struct
Add gicv3state void pointer to CPUARMState struct
to store GICv3CPUState.

In case of usecase like CPU reset, we need to reset
GICv3CPUState of the CPU. In such scenario, this pointer
becomes handy.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487850673-26455-5-git-send-email-vijay.kilari@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
367b9f527b hw/intc/arm_gicv3_kvm: Implement get/put functions
This actually implements pre_save and post_load methods for in-kernel
vGICv3.

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Message-id: 1487850673-26455-4-git-send-email-vijay.kilari@gmail.com
[PMM:
 * use decimal, not 0bnnn
 * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
 * completely rearranged the get and put functions to read and write
   the state in a natural order, rather than mixing distributor and
   redistributor state together]
Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
[Vijay:
 * Update macro KVM_VGIC_ATTR
 * Use 32 bit access for gicd and gicr
 * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
   access  are changed from 64-bit to 32-bit access
 * Add ICC_SRE_EL1 save and restore
 * Dropped translate_fn mechanism and coded functions to handle
   save and restore of edge_trigger and priority
 * Number of APnR register saved/restored based on number of
   priority bits supported]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
6692aac411 hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate
To Save and Restore ICC_SRE_EL1 register introduce vmstate
subsection and load only if non-zero.
Also initialize icc_sre_el1 with to 0x7 in pre_load
function.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487850673-26455-3-git-send-email-vijay.kilari@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Paolo Bonzini
3a5eb5b4a9 update Linux headers to 4.11
virtio_mmio.h would be deleted; I am leaving it in though it was a
mistake to add it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 16:18:49 +00:00
Paolo Bonzini
f717e6245f update-linux-headers: update for 4.11
The linux-headers/asm-arm/unistd.h file has been split in three
sub-files, copy them along.  However, building them requires
setting ARCH rather than SRCARCH.

SRCARCH defaults to $(ARCH) anyway; to avoid future occurrence of
the same problem use ARCH for all architectures where SRCARCH=ARCH.
Currently these are all except x86, sparc, sh and tile.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170221122920.16245-2-pbonzini@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 16:18:49 +00:00
Peter Maydell
8a85e0654e stm32f205: Rename 'nvic' local to 'armv7m'
The local variable 'nvic' in stm32f205_soc_realize() no longer
holds a direct pointer to the NVIC device; it is a pointer to
the ARMv7M container object. Rename it 'armv7m' accordingly.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-12-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
b72e2f6856 stm32f205: Create armv7m object without using armv7m_init()
Switch the stm32f205 SoC to create the armv7m object directly
rather than via the armv7m_init() wrapper. This fits better
with the SoC model's very QOMified design.

In particular this means we can push loading the guest image
out to the top level board code where it belongs, rather
than the SoC object having a QOM property for the filename
to load.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-11-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
ff68dacbc7 armv7m: Split systick out from NVIC
The SysTick timer isn't really part of the NVIC proper;
we just modelled it that way back when we couldn't
easily have devices that only occupied a small chunk
of a memory region. Split it out into its own device.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1487604965-23220-10-git-send-email-peter.maydell@linaro.org
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 16:18:49 +00:00
Peter Maydell
743eb70560 armv7m: Don't put core v7M devices under CONFIG_STELLARIS
The NVIC is a core v7M device that exists for all v7M CPUs;
put it under a CONFIG_ARM_V7M rather than hiding it under
CONFIG_STELLARIS.

(We'll use CONFIG_ARM_V7M for the SysTick device too
when we split it out of the NVIC.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-9-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
f68d881c9b armv7m: Make bitband device take the address space to access
Instead of the bitband device doing a cpu_physical_memory_read/write,
make it take a MemoryRegion which specifies where it should be
accessing, and use address_space_read/write to access the
corresponding AddressSpace.

Since this entails pretty much a rewrite, convert away from
old_mmio in the process.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-8-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
98957a94ef armv7m: Make NVIC expose a memory region rather than mapping itself
Make the NVIC device expose a memory region for its users
to map, rather than mapping itself into the system memory
space on realize, and get the one user (the ARMv7M object)
to do this.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-7-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
618119c2d3 armv7m: Make ARMv7M object take memory region link
Make the ARMv7M object take a memory region link which it uses
to wire up the bitband rather than having them always put
themselves in the system address space.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-6-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
21e0c38fe2 armv7m: Use QOMified armv7m object in armv7m_init()
Make the legacy armv7m_init() function use the newly QOMified
armv7m object rather than doing everything by hand.

We can return the armv7m object rather than the NVIC from
armv7m_init() because its interface to the rest of the
board (GPIOs, etc) is identical.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-5-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
56b7c66f49 armv7m: QOMify the armv7m container
Create a proper QOM object for the armv7m container, which
holds the CPU, the NVIC and the bitband regions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-4-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
6bf436cf9d armv7m: Move NVICState struct definition into header
Move the NVICState struct definition into a header, so we can
embed it into other QOM objects like SoCs.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-3-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
3651c28569 armv7m: Abstract out the "load kernel" code
Abstract the "load kernel" code out of armv7m_init() into its own
function.  This includes the registration of the CPU reset function,
to parallel how we handle this for A profile cores.

We make the function public so that boards which choose to
directly instantiate an ARMv7M device object can call it.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-2-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Alex Bennée
1ed9251515 .shippable: add s390x-cross target
Use the new debian-s390x-cross.docker target to cross compile for
s390.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20170227143028.16428-3-alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:31:01 +08:00
Alex Bennée
267004d991 new: dockerfiles/debian-s390-cross
This adds an s390 cross build target to our library of docker setups.
There is an issue with the xfslibs-dev:s390x package having a clash so
we do a || apt-get -f install to fixup the rest of the dependencies.

This doesn't build on the debian.docker file as we are using the
multilib compiler which is only available in stretch (the current
testing repo).

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
CC: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <20170227143028.16428-2-alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:31:01 +08:00
Dr. David Alan Gilbert
665414ad06 postcopy: Add extra check for COPY function
As an extra sanity check, make sure the region we're registering
can perform UFFDIO_COPY;  the COPY will fail later but this
gives a cleaner failure.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-17-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
0c1f4036db postcopy: Add doc about hugepages and postcopy
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-16-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
7e8cafb713 postcopy: Check for userfault+hugepage feature
We need extra Linux kernel support (~4.11) to support userfaults
on hugetlbfs; check for them.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-15-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
61a502128b postcopy: Update userfaultfd.h header
Just the userfaultfd.h update from Paolo's header
update run;

* Drop this patch after Paolo's update goes in *

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170224182844.32452-14-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
433bd0223c postcopy: Allow hugepages
Allow huge pages in postcopy.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-13-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
4c011c37ec postcopy: Send whole huge pages
The RAM save code uses ram_save_host_page to send whole
host pages at a time;  change this to use the host page size associated
with the RAM Block which may be a huge page.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-12-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
332847f075 postcopy: Mask fault addresses to huge page boundary
Currently the fault address received by userfault is rounded to
the host page boundary and a host page is requested from the source.
Use the current RAMBlock page size instead of the general host page
size so that for RAMBlocks backed by huge pages we request the whole
huge page.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-11-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
28abd20014 postcopy: Load huge pages in one go
The existing postcopy RAM load loop already ensures that it
glues together whole host-pages from the target page size chunks sent
over the wire.  Modify the definition of host page that it uses
to be the RAM block page size and thus be huge pages where appropriate.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-10-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
41d84210d4 postcopy: Use temporary for placing zero huge pages
The kernel can't do UFFDIO_ZEROPAGE for huge pages, so we have
to allocate a temporary (always zero) page and use UFFDIO_COPYPAGE
on it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-9-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
df9ff5e1e3 postcopy: Plumb pagesize down into place helpers
Now we deal with normal size pages and huge pages we need
to tell the place handlers the size we're dealing with
and make sure the temporary page is large enough.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-8-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
67f11b5c23 postcopy: Record largest page size
Record the largest page size in use; we'll need it soon for allocating
temporary buffers.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-7-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
e2fa71f527 postcopy: enhance ram_block_discard_range for hugepages
Unfortunately madvise DONTNEED doesn't work on hugepagetlb
so use fallocate(FALLOC_FL_PUNCH_HOLE)
qemu_fd_getpagesize only sets the page based off a file
if the file is from hugetlbfs.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-6-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
d3a5038c46 exec: ram_block_discard_range
Create ram_block_discard_range in exec.c to replace
postcopy_ram_discard_range and most of ram_discard_range.

Those two routines are a bit of a weird combination, and
ram_discard_range is about to get more complex for hugepages.
It's OS dependent code (so shouldn't be in migration/ram.c) but
it needs quite a bit of the innards of RAMBlock so doesn't belong in
the os*.c.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-5-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
29c5917201 postcopy: Chunk discards for hugepages
At the start of the postcopy phase, partially sent huge pages
must be discarded.  The code for dealing with host page sizes larger
than the target page size can be reused for this case.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-4-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
ef08fb389f postcopy: Transmit and compare individual page sizes
When using postcopy with hugepages, we require the source
and destination page sizes for any RAMBlock to match; note
that different RAMBlocks in the same VM can have different
page sizes.

Transmit them as part of the RAM information header and
fail if there's a difference.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170224182844.32452-3-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
e8ca1db29b postcopy: Transmit ram size summary word
Replace the host page-size in the 'advise' command by a pagesize
summary bitmap; if the VM is just using normal RAM then
this will be exactly the same as before, however if they're using
huge pages they'll be different, and thus:
   a) Migration from/to old qemu's that don't understand huge pages
      will fail early.
   b) Migrations with different size RAMBlocks will also fail early.

This catches it very early; earlier than the detailed per-block
check in the next patch.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170224182844.32452-2-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Vladimir Sementsov-Ogievskiy
f9c8caa04f migration: fix use-after-free of to_dst_file
hmp_savevm calls qemu_savevm_state(f), which sets to_dst_file=f in
global migration state. Then hmp_savevm closes f (g_free called).

Next access to to_dst_file in migration state (for example,
qmp_migrate_set_speed) will use it after it was freed.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170225193155.447462-5-vsementsov@virtuozzo.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
5f9412bbac migration: Update docs to discourage version bumps
Version bumps break backwards migration; update the docs
to explain to people that's bad and how to avoid it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170210110359.8210-1-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Marc-André Lureau
128e4e1089 migration: fix id leak regression
This leak was introduced in commit
581f08bac2.

(it stands out quickly with ASAN once the rest of the leaks are also
removed from make check with this series)

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Juan Quintela <quintela@redhat.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170221141451.28305-31-marcandre.lureau@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Ashijeet Acharya
7562f90707 migrate: Introduce a 'dc->vmsd' check to avoid segfault for --only-migratable
Commit a3a3d8c7 introduced a segfault bug while checking for
'dc->vmsd->unmigratable' which caused QEMU to crash when trying to add
devices which do no set their 'dc->vmsd' yet while initialization.
Place a 'dc->vmsd' check prior to it so that we do not segfault for
such devices.

NOTE: This doesn't compromise the functioning of --only-migratable
option as all the unmigratable devices do set their 'dc->vmsd'.

Introduce a new function check_migratable() and move the
only_migratable check inside it, also use stubs to avoid user-mode qemu
build failures.

Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
Message-Id: <1487009088-23891-1-git-send-email-ashijeetacharya@gmail.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Laurent Vivier
9cd49026aa vmstate-static-checker: update white list with spapr_pci
To fix migration between 2.7 and 2.8, some fields have
been renamed and managed with the help of a PHB property
(pre_2_8_migration):

    5c4537b spapr: Fix 2.7<->2.8 migration of PCI host bridge

So we need to add them to the white list:

    dma_liobn[0],
    mem_win_addr, mem_win_size,
    io_win_addr, io_win_size

become

    mig_liobn,
    mig_mem_win_addr, mig_mem_win_size,
    mig_io_win_addr, mig_io_win_size

CC: David Gibson <david@gibson.dropbear.id.au>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Thomas Huth <thuth@redhat.com>
CC: Greg Kurz <groug@kaod.org>
CC: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170214133331.28997-1-lvivier@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Halil Pasic
4333309961 tests/test-vmstate.c: test array of ptr to primitive
Let's have a test for ptr arrays to some primitive type with some
not-null and null ptrs intermixed.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

Message-Id: <20170222160119.52771-6-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Halil Pasic
cc95883185 tests/test-vmstate.c: test array of ptr with null
Add test for VMSTATE_ARRAY_OF_POINTER_TO_STRUCT with an array
containing some null pointer.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-5-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
   Fixed type case in assert to uintptr_t rather than uint64_t
2017-02-28 11:30:06 +00:00
Halil Pasic
07d4e69147 migration/vmstate: fix array of ptr with nullptrs
Make VMS_ARRAY_OF_POINTER cope with null pointers. Previously the
reward for trying to migrate an array with some null pointers in it was
an illegal memory access, that is a swift and painless death of the
process.  Let's make vmstate cope with this scenario.

The general approach is, when we encounter a null pointer (element),
instead of following the pointer to save/load the data behind it, we
save/load a placeholder. This way we can detect if we expected a null
pointer at the load side but not null data was saved instead.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Guenther Hutzl <hutzl@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-4-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Halil Pasic
cbfda0e6cf migration/vmstate: split up vmstate_base_addr
Currently vmstate_base_addr does several things: it pinpoints the field
within the struct, possibly allocates memory and possibly does the first
pointer dereference. Obviously allocation is needed only for load.

Let us split up the functionality in vmstate_base_addr and move the
address manipulations (that is everything but the allocation logic) to
load and save so it becomes more obvious what is actually going on. Like
this all the address calculations (and the handling of the flags
controlling these) is in one place and the sequence is more obvious.

The newly introduced function vmstate_handle_alloc also fixes the
allocation for the unused VMS_VBUFFER|VMS_MULTIPLY|VMS_ALLOC scenario
and is substantially simpler than the original vmstate_base_addr.

In load and save some asserts are added so it's easier to debug
situations where we would end up with a null pointer dereference.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-3-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Halil Pasic
e84641f73d migration/vmstate: renames in (load|save)_state
The vmstate_(load|save)_state start out with an a void *opaque pointing
to some struct, and manipulate one or more elements of one field within
that struct.

First the field within the struct is pinpointed as opaque + offset, then
if this is a pointer the pointer is dereferenced to obtain a pointer to
the first element of the vmstate field. Pointers to further elements if
any are calculated as first_element + i * element_size (where i is the
zero based index of the element in question).

Currently base_addr and addr is used as a variable name for the pointer
to the first element and the pointer to the current element being
processed. This is suboptimal because base_addr is somewhat
counter-intuitive (because obtained as base + offset) and both base_addr
and addr not very descriptive (that we have a pointer should be clear
from the fact that it is declared as a pointer).

Let make things easier to understand by renaming base_addr to first_elem
and addr to curr_elem. This has the additional benefit of harmonizing
with other names within the scope (n_elems, vmstate_n_elems).

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-2-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Daniel Henrique Barboza
87c9cc1c30 Changing error message of QMP 'migrate_set_downtime' to seconds
Using QMP, the error message of 'migrate_set_downtime' was displaying
the values in milliseconds, being misleading with the command that
accepts the value in seconds:

{ "execute": "migrate_set_downtime", "arguments": {"value": 3000}}
{"error": {"class": "GenericError", "desc": "Parameter 'downtime_limit'
expects an integer in the range of 0 to 2000000 milliseconds"}}

This message is also seen in HMP when trying to set the same
parameter:

(qemu) migrate_set_parameter downtime-limit 3000000
Parameter 'downtime_limit' expects an integer in the range of 0 to
2000000 milliseconds

To allow for a proper error message when using QMP, a validation
of the user input was added in 'qmp_migrate_set_downtime'.

Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
Message-Id: <20170222151729.5812-1-danielhb@linux.vnet.ibm.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Cornelia Huck
e8ebf60f6d pc-bios/s390-ccw.img: rebuild image
Contains the following commits:
- pc-bios/s390-ccw: Use the ccw bios to start the network boot

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
99b72e0fbb pc-bios/s390-ccw: Use the ccw bios to start the network boot
We want to use the ccw bios to start final network boot. To do
this we use ccw bios to detect if the boot device is a virtio
network device and retrieve the start address of the
network boot image.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
f38b5b7fc4 s390x/ipl: Load network boot image
Load the network boot image into guest RAM when the boot
device selected is a network device. Use some of the reserved
space in IplBlockCcw to store the start address of the netboot
image.

A user could also use 'chreipl'(diag 308/5) to change the boot device.
So every time we update the IPLB, we need to verify if the selected
boot device is a network device so we can appropriately load the
network boot image.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
5f31ade055 s390x/ipl: Extend S390IPLState to support network boot
Add new field to S390IPLState to store the name of the network boot
loader.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
34f1b23f8a elf-loader: Allow late loading of elf
The current QEMU ROM infrastructure rejects late loading of ROMs.
And ELFs are currently loaded as ROM, this prevents delayed loading
of ELFs. So when loading ELF, allow the user to specify if ELF should
be loaded as ROM or not.

If an ELF is not loaded as ROM, then they are not restored on a
guest reboot/reset and so its upto the user to handle the reloading.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Greg Kurz
c23d5f1d5b 9pfs: local: drop unused code
Now that the all callbacks have been converted to use "at" syscalls, we
can drop this code.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
a565fea565 9pfs: local: open2: don't follow symlinks
The local_open2() callback is vulnerable to symlink attacks because it
calls:

(1) open() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one
(4) local_post_create_passthrough() which calls in turn lchown() and
    chmod(), both functions also following symbolic links

This patch converts local_open2() to rely on opendir_nofollow() and
mkdirat() to fix (1), as well as local_set_xattrat(),
local_set_mapped_file_attrat() and local_set_cred_passthrough() to
fix (2), (3) and (4) respectively. Since local_open2() already opens
a descriptor to the target file, local_set_cred_passthrough() is
modified to reuse it instead of opening a new one.

The mapped and mapped-file security modes are supposed to be identical,
except for the place where credentials and file modes are stored. While
here, we also make that explicit by sharing the call to openat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
3f3a16990b 9pfs: local: mkdir: don't follow symlinks
The local_mkdir() callback is vulnerable to symlink attacks because it
calls:

(1) mkdir() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one
(4) local_post_create_passthrough() which calls in turn lchown() and
    chmod(), both functions also following symbolic links

This patch converts local_mkdir() to rely on opendir_nofollow() and
mkdirat() to fix (1), as well as local_set_xattrat(),
local_set_mapped_file_attrat() and local_set_cred_passthrough() to
fix (2), (3) and (4) respectively.

The mapped and mapped-file security modes are supposed to be identical,
except for the place where credentials and file modes are stored. While
here, we also make that explicit by sharing the call to mkdirat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
d815e72190 9pfs: local: mknod: don't follow symlinks
The local_mknod() callback is vulnerable to symlink attacks because it
calls:

(1) mknod() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one
(4) local_post_create_passthrough() which calls in turn lchown() and
    chmod(), both functions also following symbolic links

This patch converts local_mknod() to rely on opendir_nofollow() and
mknodat() to fix (1), as well as local_set_xattrat() and
local_set_mapped_file_attrat() to fix (2) and (3) respectively.

A new local_set_cred_passthrough() helper based on fchownat() and
fchmodat_nofollow() is introduced as a replacement to
local_post_create_passthrough() to fix (4).

The mapped and mapped-file security modes are supposed to be identical,
except for the place where credentials and file modes are stored. While
here, we also make that explicit by sharing the call to mknodat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
38771613ea 9pfs: local: symlink: don't follow symlinks
The local_symlink() callback is vulnerable to symlink attacks because it
calls:

(1) symlink() which follows symbolic links for all path elements but the
    rightmost one
(2) open(O_NOFOLLOW) which follows symbolic links for all path elements but
    the rightmost one
(3) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(4) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one

This patch converts local_symlink() to rely on opendir_nofollow() and
symlinkat() to fix (1), openat(O_NOFOLLOW) to fix (2), as well as
local_set_xattrat() and local_set_mapped_file_attrat() to fix (3) and
(4) respectively.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
d369f20763 9pfs: local: chown: don't follow symlinks
The local_chown() callback is vulnerable to symlink attacks because it
calls:

(1) lchown() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one

This patch converts local_chown() to rely on open_nofollow() and
fchownat() to fix (1), as well as local_set_xattrat() and
local_set_mapped_file_attrat() to fix (2) and (3) respectively.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
e3187a45dd 9pfs: local: chmod: don't follow symlinks
The local_chmod() callback is vulnerable to symlink attacks because it
calls:

(1) chmod() which follows symbolic links for all path elements
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one

We would need fchmodat() to implement AT_SYMLINK_NOFOLLOW to fix (1). This
isn't the case on linux unfortunately: the kernel doesn't even have a flags
argument to the syscall :-\ It is impossible to fix it in userspace in
a race-free manner. This patch hence converts local_chmod() to rely on
open_nofollow() and fchmod(). This fixes the vulnerability but introduces
a limitation: the target file must readable and/or writable for the call
to openat() to succeed.

It introduces a local_set_xattrat() replacement to local_set_xattr()
based on fsetxattrat() to fix (2), and a local_set_mapped_file_attrat()
replacement to local_set_mapped_file_attr() based on local_fopenat()
and mkdirat() to fix (3). No effort is made to factor out code because
both local_set_xattr() and local_set_mapped_file_attr() will be dropped
when all users have been converted to use the "at" versions.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
ad0b46e6ac 9pfs: local: link: don't follow symlinks
The local_link() callback is vulnerable to symlink attacks because it calls:

(1) link() which follows symbolic links for all path elements but the
    rightmost one
(2) local_create_mapped_attr_dir()->mkdir() which follows symbolic links
    for all path elements but the rightmost one

This patch converts local_link() to rely on opendir_nofollow() and linkat()
to fix (1), mkdirat() to fix (2).

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
6dd4b1f1d0 9pfs: local: improve error handling in link op
When using the mapped-file security model, we also have to create a link
for the metadata file if it exists. In case of failure, we should rollback.

That's what this patch does.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
d2767edec5 9pfs: local: rename: use renameat
The local_rename() callback is vulnerable to symlink attacks because it
uses rename() which follows symbolic links in all path elements but the
rightmost one.

This patch simply transforms local_rename() into a wrapper around
local_renameat() which is symlink-attack safe.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
99f2cf4b2d 9pfs: local: renameat: don't follow symlinks
The local_renameat() callback is currently a wrapper around local_rename()
which is vulnerable to symlink attacks.

This patch rewrites local_renameat() to have its own implementation, based
on local_opendir_nofollow() and renameat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
f9aef99b3e 9pfs: local: lstat: don't follow symlinks
The local_lstat() callback is vulnerable to symlink attacks because it
calls:

(1) lstat() which follows symbolic links in all path elements but the
    rightmost one
(2) getxattr() which follows symbolic links in all path elements
(3) local_mapped_file_attr()->local_fopen()->openat(O_NOFOLLOW) which
    follows symbolic links in all path elements but the rightmost
    one

This patch converts local_lstat() to rely on opendir_nofollow() and
fstatat(AT_SYMLINK_NOFOLLOW) to fix (1), fgetxattrat_nofollow() to
fix (2).

A new local_fopenat() helper is introduced as a replacement to
local_fopen() to fix (3). No effort is made to factor out code
because local_fopen() will be dropped when all users have been
converted to call local_fopenat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
bec1e9546e 9pfs: local: readlink: don't follow symlinks
The local_readlink() callback is vulnerable to symlink attacks because it
calls:

(1) open(O_NOFOLLOW) which follows symbolic links for all path elements but
    the rightmost one
(2) readlink() which follows symbolic links for all path elements but the
    rightmost one

This patch converts local_readlink() to rely on open_nofollow() to fix (1)
and opendir_nofollow(), readlinkat() to fix (2).

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
ac125d993b 9pfs: local: truncate: don't follow symlinks
The local_truncate() callback is vulnerable to symlink attacks because
it calls truncate() which follows symbolic links in all path elements.

This patch converts local_truncate() to rely on open_nofollow() and
ftruncate() instead.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
31e51d1c15 9pfs: local: statfs: don't follow symlinks
The local_statfs() callback is vulnerable to symlink attacks because it
calls statfs() which follows symbolic links in all path elements.

This patch converts local_statfs() to rely on open_nofollow() and fstatfs()
instead.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
a33eda0dd9 9pfs: local: utimensat: don't follow symlinks
The local_utimensat() callback is vulnerable to symlink attacks because it
calls qemu_utimens()->utimensat(AT_SYMLINK_NOFOLLOW) which follows symbolic
links in all path elements but the rightmost one or qemu_utimens()->utimes()
which follows symbolic links for all path elements.

This patch converts local_utimensat() to rely on opendir_nofollow() and
utimensat(AT_SYMLINK_NOFOLLOW) directly instead of using qemu_utimens().
It is hence assumed that the OS supports utimensat(), i.e. has glibc 2.6
or higher and linux 2.6.22 or higher, which seems reasonable nowadays.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
a0e640a872 9pfs: local: remove: don't follow symlinks
The local_remove() callback is vulnerable to symlink attacks because it
calls:

(1) lstat() which follows symbolic links in all path elements but the
    rightmost one
(2) remove() which follows symbolic links in all path elements but the
    rightmost one

This patch converts local_remove() to rely on opendir_nofollow(),
fstatat(AT_SYMLINK_NOFOLLOW) to fix (1) and unlinkat() to fix (2).

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
df4938a665 9pfs: local: unlinkat: don't follow symlinks
The local_unlinkat() callback is vulnerable to symlink attacks because it
calls remove() which follows symbolic links in all path elements but the
rightmost one.

This patch converts local_unlinkat() to rely on opendir_nofollow() and
unlinkat() instead.

Most of the code is moved to a separate local_unlinkat_common() helper
which will be reused in a subsequent patch to fix the same issue in
local_remove().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
72f0d0bf51 9pfs: local: lremovexattr: don't follow symlinks
The local_lremovexattr() callback is vulnerable to symlink attacks because
it calls lremovexattr() which follows symbolic links in all path elements
but the rightmost one.

This patch introduces a helper to emulate the non-existing fremovexattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to lremovexattr().

local_lremovexattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
3e36aba757 9pfs: local: lsetxattr: don't follow symlinks
The local_lsetxattr() callback is vulnerable to symlink attacks because
it calls lsetxattr() which follows symbolic links in all path elements but
the rightmost one.

This patch introduces a helper to emulate the non-existing fsetxattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to lsetxattr().

local_lsetxattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
5507904e36 9pfs: local: llistxattr: don't follow symlinks
The local_llistxattr() callback is vulnerable to symlink attacks because
it calls llistxattr() which follows symbolic links in all path elements but
the rightmost one.

This patch introduces a helper to emulate the non-existing flistxattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to llistxattr().

local_llistxattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
56ad3e54da 9pfs: local: lgetxattr: don't follow symlinks
The local_lgetxattr() callback is vulnerable to symlink attacks because
it calls lgetxattr() which follows symbolic links in all path elements but
the rightmost one.

This patch introduces a helper to emulate the non-existing fgetxattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to lgetxattr().

local_lgetxattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
996a0d76d7 9pfs: local: open/opendir: don't follow symlinks
The local_open() and local_opendir() callbacks are vulnerable to symlink
attacks because they call:

(1) open(O_NOFOLLOW) which follows symbolic links in all path elements but
    the rightmost one
(2) opendir() which follows symbolic links in all path elements

This patch converts both callbacks to use new helpers based on
openat_nofollow() to only open files and directories if they are
below the virtfs shared folder

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
0e35a37829 9pfs: local: keep a file descriptor on the shared folder
This patch opens the shared folder and caches the file descriptor, so that
it can be used to do symlink-safe path walk.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
6482a96163 9pfs: introduce relative_openat_nofollow() helper
When using the passthrough security mode, symbolic links created by the
guest are actual symbolic links on the host file system.

Since the resolution of symbolic links during path walk is supposed to
occur on the client side. The server should hence never receive any path
pointing to an actual symbolic link. This isn't guaranteed by the protocol
though, and malicious code in the guest can trick the server to issue
various syscalls on paths whose one or more elements are symbolic links.
In the case of the "local" backend using the "passthrough" or "none"
security modes, the guest can directly create symbolic links to arbitrary
locations on the host (as per spec). The "mapped-xattr" and "mapped-file"
security modes are also affected to a lesser extent as they require some
help from an external entity to create actual symbolic links on the host,
i.e. another guest using "passthrough" mode for example.

The current code hence relies on O_NOFOLLOW and "l*()" variants of system
calls. Unfortunately, this only applies to the rightmost path component.
A guest could maliciously replace any component in a trusted path with a
symbolic link. This could allow any guest to escape a virtfs shared folder.

This patch introduces a variant of the openat() syscall that successively
opens each path element with O_NOFOLLOW. When passing a file descriptor
pointing to a trusted directory, one is guaranteed to be returned a
file descriptor pointing to a path which is beneath the trusted directory.
This will be used by subsequent patches to implement symlink-safe path walk
for any access to the backend.

Symbolic links aren't the only threats actually: a malicious guest could
change a path element to point to other types of file with undesirable
effects:
- a named pipe or any other thing that would cause openat() to block
- a terminal device which would become QEMU's controlling terminal

These issues can be addressed with O_NONBLOCK and O_NOCTTY.

Two helpers are introduced: one to open intermediate path elements and one
to open the rightmost path element.

Suggested-by: Jann Horn <jannh@google.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
(renamed openat_nofollow() to relative_openat_nofollow(),
 assert path is relative and doesn't contain '//',
 fixed side-effect in assert, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 11:21:15 +01:00
Greg Kurz
21328e1e57 9pfs: remove side-effects in local_open() and local_opendir()
If these functions fail, they should not change *fs. Let's use local
variables to fix this.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:14 +01:00
Greg Kurz
00c90bd1c2 9pfs: remove side-effects in local_init()
If this function fails, it should not modify *ctx.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:14 +01:00
Greg Kurz
56fc494bdc 9pfs: local: move xattr security ops to 9p-xattr.c
These functions are always called indirectly. It really doesn't make sense
for them to sit in a header file.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:14 +01:00
Gerd Hoffmann
8779fccbef seabios: update to 1.10.2 release
git shortlog rel-1.10.1..rel-1.10.2
===================================

Ben Warren (5):
      QEMU DMA: Add DMA write capability
      romfile-loader: Switch to using named structs
      QEMU fw_cfg: Add command to write back address of file
      QEMU fw_cfg: Add functions for accessing files by key
      QEMU fw_cfg: Write fw_cfg back on S3 resume

Kevin O'Connor (1):
      ps2port: Disable keyboard/mouse prior to resetting ps2 controller

Ladi Prosek (1):
      ahci: Set upper 32-bit registers to zero

Paul Menzel (1):
      vgasrc: Increase debug level

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-28 09:54:23 +01:00
Eduardo Habkost
b8097deb35 i386: Improve query-cpu-model-expansion full mode
This keeps the same results on type=static expansion, but make
type=full expansion return every single QOM property on the CPU
object that have a different value from the "base' CPU model,
plus all the CPU feature flag properties.

Cc: Jiri Denemark <jdenemar@redhat.com>
Message-Id: <20170222190029.17243-4-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:35 -03:00
Eduardo Habkost
f99fd7ca2a i386: Implement query-cpu-model-expansion QMP command
Implement query-cpu-model-expansion for target-i386.

This should meet all the requirements while being simple. In the
case of static expansion, it will use the new "base" CPU model,
and in the case of full expansion, it will keep the original CPU
model name+props, and append extra properties.

A future follow-up should improve the implementation of
type=full, so that it returns more detailed data, including every
writable QOM property in the CPU object.

Cc: libvir-list@redhat.com
Cc: Jiri Denemark <jdenemar@redhat.com>
Message-Id: <20170222190029.17243-3-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:31 -03:00
Eduardo Habkost
5adbed3088 i386: Define static "base" CPU model
The query-cpu-model-expand QMP command needs at least one static
model, to allow the "static" expansion mode to be implemented.
Instead of defining static versions of every CPU model, define a
"base" CPU model that has absolutely no feature flag enabled.

Despite having no CPUID data set at all, "-cpu base" is even a
functional CPU:

* It can boot a Slackware Linux 1.01 image with a Linux 0.99.12
  kernel[1].
* It is even possible to boot[2] a modern Fedora x86_64 guest by
  manually enabling the following CPU features:
  -cpu base,+lm,+msr,+pae,+fpu,+cx8,+cmov,+sse,+sse2,+fxsr

[1] http://www.qemu-advent-calendar.org/2014/#day-1
[2] This is what can be seen in the guest:
    [root@localhost ~]# cat /proc/cpuinfo
    processor       : 0
    vendor_id       : unknown
    cpu family      : 0
    model           : 0
    model name      : 00/00
    stepping        : 0
    physical id     : 0
    siblings        : 1
    core id         : 0
    cpu cores       : 1
    apicid          : 0
    initial apicid  : 0
    fpu             : yes
    fpu_exception   : yes
    cpuid level     : 1
    wp              : yes
    flags           : fpu msr pae cx8 cmov fxsr sse sse2 lm nopl
    bugs            :
    bogomips        : 5832.70
    clflush size    : 64
    cache_alignment : 64
    address sizes   : 36 bits physical, 48 bits virtual
    power management:

    [root@localhost ~]# x86info -v -a
    x86info v1.30.  Dave Jones 2001-2011
    Feedback to <davej@redhat.com>.

    No TSC, MHz calculation cannot be performed.
    Unknown vendor (0)
    MP Table:

    Family: 0 Model: 0 Stepping: 0
    CPU Model (x86info's best guess):

    eax in: 0x00000000, eax = 00000001 ebx = 00000000 ecx = 00000000 edx = 00000000
    eax in: 0x00000001, eax = 00000000 ebx = 00000800 ecx = 00000000 edx = 07008161

    eax in: 0x80000000, eax = 80000001 ebx = 00000000 ecx = 00000000 edx = 00000000
    eax in: 0x80000001, eax = 00000000 ebx = 00000000 ecx = 00000000 edx = 20000000

    Feature flags:
     fpu            Onboard FPU
     msr            Model-Specific Registers
     pae            Physical Address Extensions
     cx8            CMPXCHG8 instruction
     cmov           CMOV instruction
     fxsr           FXSAVE and FXRSTOR instructions
     sse            SSE support
     sse2           SSE2 support

    Long NOPs supported: yes

    Address sizes : 0 bits physical, 0 bits virtual
    0MHz processor (estimate).

     running at an estimated 0MHz
    [root@localhost ~]#

Message-Id: <20170222190029.17243-2-ehabkost@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:27 -03:00
Eduardo Habkost
0bacd8b304 i386: Don't set CPUClass::cpu_def on "max" model
Host CPUID info is used by the "max" CPU model only in KVM mode.
Move the initialization of CPUID data for "max" from class_init
to instance_init, and don't set CPUClass::cpu_def for "max".

Message-Id: <20170222183919.11928-4-ehabkost@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:25 -03:00
Eduardo Habkost
6900d1cc8a i386: Make "max" model not use any host CPUID info on TCG
Instead of reporting host CPUID data on "max", use the qemu64 CPU
model as reference to initialize CPUID
vendor/family/model/stepping/model-id.

Message-Id: <20170222183919.11928-3-ehabkost@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:23 -03:00
Eduardo Habkost
c62f2630f8 i386: Create "max" CPU model
Rename the existing "host" CPU model to "max, and set it to
kvm_enabled=false. The new "max" CPU model will be able to enable
all features supported by TCG out of the box, because its logic
is based on x86_cpu_get_supported_feature_word(), which already
works with TCG.

A new KVM-specific "host" class was added, that simply inherits
everything from "max" except the 'ordering' and 'description'
fields.

Message-Id: <20170222183919.11928-2-ehabkost@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:21 -03:00
Eduardo Habkost
a357a65b66 qapi-schema: Comment about full expansion of non-migration-safe models
Add a note warning that static expansion may not be 100% accurate
when the CPU model is not migration-safe. This will be the case
on x86 when expansing the "host" CPU model, because there are
"host" features that can't have a migration-safe representation
(e.g. "host-cache-info").

Message-Id: <20170116211124.29245-3-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:13 -03:00
Eduardo Habkost
b8d834a00f i386: Reorganize and document CPUID initialization steps
CPU runnability checks and CPU model expansion have slightly
different requirements. Document the steps involved in loading a
CPU model and realizing a CPU, so their requirements and purpose
are clearly defined.

This patch doesn't change any implementation. It just add
comments, rename the x86_cpu_load_features() function for clarity
(so it won't be confused with x86_cpu_load_def()), and move
x86_cpu_filter_features() closer to it.

Message-Id: <20170116211124.29245-2-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:09 -03:00
Eduardo Habkost
44bd8e5306 i386: Rename X86CPU::host_features to X86CPU::max_features
Rename the field and add a small comment to make its purpose
clearer.

Message-Id: <20170119210449.11991-4-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:06 -03:00
Eduardo Habkost
f48c883703 i386: Add ordering field to CPUClass
Instead of using kvm_enabled to order the "-cpu help" list, use a
new "ordering" field for that.

Message-Id: <20170119210449.11991-3-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:03 -03:00
Eduardo Habkost
771a13e90d i386: Unset cannot_destroy_with_object_finalize_yet on "host" model
The class is now safe because the assert(kvm_enabled()) line was
removed by commit e435601058.

Message-Id: <20170119210449.11991-2-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:22:58 -03:00
266 changed files with 8700 additions and 3375 deletions

View File

@@ -5,6 +5,8 @@ env:
TARGET_LIST=arm-softmmu,arm-linux-user
- IMAGE=debian-arm64-cross
TARGET_LIST=aarch64-softmmu,aarch64-linux-user
- IMAGE=debian-s390x-cross
TARGET_LIST=s390x-softmmu,s390x-linux-user
build:
pre_ci:
- make docker-image-${IMAGE}

View File

@@ -28,6 +28,7 @@
#include "qemu/timer.h"
#include "sysemu/sysemu.h"
#include "qemu/cutils.h"
#include "sysemu/replay.h"
#define AUDIO_CAP "audio"
#include "audio_int.h"
@@ -1112,7 +1113,7 @@ static int audio_is_timer_needed (void)
static void audio_reset_timer (AudioState *s)
{
if (audio_is_timer_needed ()) {
timer_mod (s->ts,
timer_mod_anticipate_ns(s->ts,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
}
else {
@@ -1387,6 +1388,7 @@ static void audio_run_out (AudioState *s)
prev_rpos = hw->rpos;
played = hw->pcm_ops->run_out (hw, live);
replay_audio_out(&played);
if (audio_bug (AUDIO_FUNC, hw->rpos >= hw->samples)) {
dolog ("hw->rpos=%d hw->samples=%d played=%d\n",
hw->rpos, hw->samples, played);
@@ -1450,9 +1452,12 @@ static void audio_run_in (AudioState *s)
while ((hw = audio_pcm_hw_find_any_enabled_in (hw))) {
SWVoiceIn *sw;
int captured, min;
int captured = 0, min;
captured = hw->pcm_ops->run_in (hw);
if (replay_mode != REPLAY_MODE_PLAY) {
captured = hw->pcm_ops->run_in(hw);
}
replay_audio_in(&captured, hw->conv_buf, &hw->wpos, hw->samples);
min = audio_pcm_hw_find_min_in (hw);
hw->total_samples_captured += captured - min;

View File

@@ -166,4 +166,9 @@ int wav_start_capture (CaptureState *s, const char *path, int freq,
bool audio_is_cleaning_up(void);
void audio_cleanup(void);
void audio_sample_to_uint64(void *samples, int pos,
uint64_t *left, uint64_t *right);
void audio_sample_from_uint64(void *samples, int pos,
uint64_t left, uint64_t right);
#endif /* QEMU_AUDIO_H */

View File

@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/bswap.h"
#include "qemu/error-report.h"
#include "audio.h"
#define AUDIO_CAP "mixeng"
@@ -267,6 +268,37 @@ f_sample *mixeng_clip[2][2][2][3] = {
}
};
void audio_sample_to_uint64(void *samples, int pos,
uint64_t *left, uint64_t *right)
{
struct st_sample *sample = samples;
sample += pos;
#ifdef FLOAT_MIXENG
error_report(
"Coreaudio and floating point samples are not supported by replay yet");
abort();
#else
*left = sample->l;
*right = sample->r;
#endif
}
void audio_sample_from_uint64(void *samples, int pos,
uint64_t left, uint64_t right)
{
struct st_sample *sample = samples;
sample += pos;
#ifdef FLOAT_MIXENG
error_report(
"Coreaudio and floating point samples are not supported by replay yet");
abort();
#else
sample->l = left;
sample->r = right;
#endif
}
/*
* August 21, 1998
* Copyright 1998 Fabrice Bellard.

View File

@@ -38,10 +38,14 @@
#define AUDIO_CAP "sdl"
#include "audio_int.h"
#define USE_SEMAPHORE (SDL_MAJOR_VERSION < 2)
typedef struct SDLVoiceOut {
HWVoiceOut hw;
int live;
#if USE_SEMAPHORE
int rpos;
#endif
int decr;
} SDLVoiceOut;
@@ -53,8 +57,10 @@ static struct {
static struct SDLAudioState {
int exit;
#if USE_SEMAPHORE
SDL_mutex *mutex;
SDL_sem *sem;
#endif
int initialized;
bool driver_created;
} glob_sdl;
@@ -73,31 +79,45 @@ static void GCC_FMT_ATTR (1, 2) sdl_logerr (const char *fmt, ...)
static int sdl_lock (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_LockMutex (s->mutex)) {
sdl_logerr ("SDL_LockMutex for %s failed\n", forfn);
return -1;
}
#else
SDL_LockAudio();
#endif
return 0;
}
static int sdl_unlock (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_UnlockMutex (s->mutex)) {
sdl_logerr ("SDL_UnlockMutex for %s failed\n", forfn);
return -1;
}
#else
SDL_UnlockAudio();
#endif
return 0;
}
static int sdl_post (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_SemPost (s->sem)) {
sdl_logerr ("SDL_SemPost for %s failed\n", forfn);
return -1;
}
#endif
return 0;
}
#if USE_SEMAPHORE
static int sdl_wait (SDLAudioState *s, const char *forfn)
{
if (SDL_SemWait (s->sem)) {
@@ -106,6 +126,7 @@ static int sdl_wait (SDLAudioState *s, const char *forfn)
}
return 0;
}
#endif
static int sdl_unlock_and_post (SDLAudioState *s, const char *forfn)
{
@@ -246,6 +267,7 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
int to_mix, decr;
/* dolog ("in callback samples=%d\n", samples); */
#if USE_SEMAPHORE
sdl_wait (s, "sdl_callback");
if (s->exit) {
return;
@@ -264,6 +286,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
if (!sdl->live) {
goto again;
}
#else
if (s->exit || !sdl->live) {
break;
}
#endif
/* dolog ("in callback live=%d\n", live); */
to_mix = audio_MIN (samples, sdl->live);
@@ -274,7 +301,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
/* dolog ("in callback to_mix %d, chunk %d\n", to_mix, chunk); */
hw->clip (buf, src, chunk);
#if USE_SEMAPHORE
sdl->rpos = (sdl->rpos + chunk) % hw->samples;
#else
hw->rpos = (hw->rpos + chunk) % hw->samples;
#endif
to_mix -= chunk;
buf += chunk << hw->info.shift;
}
@@ -282,12 +313,21 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
sdl->live -= decr;
sdl->decr += decr;
#if USE_SEMAPHORE
again:
if (sdl_unlock (s, "sdl_callback")) {
return;
}
#endif
}
/* dolog ("done len=%d\n", len); */
#if (SDL_MAJOR_VERSION >= 2)
/* SDL2 does not clear the remaining buffer for us, so do it on our own */
if (samples) {
memset(buf, 0, samples << hw->info.shift);
}
#endif
}
static int sdl_write_out (SWVoiceOut *sw, void *buf, int len)
@@ -315,8 +355,12 @@ static int sdl_run_out (HWVoiceOut *hw, int live)
decr = audio_MIN (sdl->decr, live);
sdl->decr -= decr;
#if USE_SEMAPHORE
sdl->live = live - decr;
hw->rpos = sdl->rpos;
#else
sdl->live = live;
#endif
if (sdl->live > 0) {
sdl_unlock_and_post (s, "sdl_run_out");
@@ -405,6 +449,7 @@ static void *sdl_audio_init (void)
return NULL;
}
#if USE_SEMAPHORE
s->mutex = SDL_CreateMutex ();
if (!s->mutex) {
sdl_logerr ("Failed to create SDL mutex\n");
@@ -419,6 +464,7 @@ static void *sdl_audio_init (void)
SDL_QuitSubSystem (SDL_INIT_AUDIO);
return NULL;
}
#endif
s->driver_created = true;
return s;
@@ -428,8 +474,10 @@ static void sdl_audio_fini (void *opaque)
{
SDLAudioState *s = opaque;
sdl_close (s);
#if USE_SEMAPHORE
SDL_DestroySemaphore (s->sem);
SDL_DestroyMutex (s->mutex);
#endif
SDL_QuitSubSystem (SDL_INIT_AUDIO);
s->driver_created = false;
}

583
block.c
View File

@@ -707,6 +707,12 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
return 0;
}
static char *bdrv_child_get_parent_desc(BdrvChild *c)
{
BlockDriverState *parent = c->opaque;
return g_strdup(bdrv_get_device_or_node_name(parent));
}
static void bdrv_child_cb_drained_begin(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
@@ -774,6 +780,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
}
const BdrvChildRole child_file = {
.get_parent_desc = bdrv_child_get_parent_desc,
.inherit_options = bdrv_inherited_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
@@ -794,11 +801,63 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
}
const BdrvChildRole child_format = {
.get_parent_desc = bdrv_child_get_parent_desc,
.inherit_options = bdrv_inherited_fmt_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
};
static void bdrv_backing_attach(BdrvChild *c)
{
BlockDriverState *parent = c->opaque;
BlockDriverState *backing_hd = c->bs;
assert(!parent->backing_blocker);
error_setg(&parent->backing_blocker,
"node is used as backing hd of '%s'",
bdrv_get_device_or_node_name(parent));
parent->open_flags &= ~BDRV_O_NO_BACKING;
pstrcpy(parent->backing_file, sizeof(parent->backing_file),
backing_hd->filename);
pstrcpy(parent->backing_format, sizeof(parent->backing_format),
backing_hd->drv ? backing_hd->drv->format_name : "");
bdrv_op_block_all(backing_hd, parent->backing_blocker);
/* Otherwise we won't be able to commit or stream */
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
parent->backing_blocker);
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
parent->backing_blocker);
/*
* We do backup in 3 ways:
* 1. drive backup
* The target bs is new opened, and the source is top BDS
* 2. blockdev backup
* Both the source and the target are top BDSes.
* 3. internal backup(used for block replication)
* Both the source and the target are backing file
*
* In case 1 and 2, neither the source nor the target is the backing file.
* In case 3, we will block the top BDS, so there is only one block job
* for the top BDS and its backing chain.
*/
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
parent->backing_blocker);
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
parent->backing_blocker);
}
static void bdrv_backing_detach(BdrvChild *c)
{
BlockDriverState *parent = c->opaque;
assert(parent->backing_blocker);
bdrv_op_unblock_all(c->bs, parent->backing_blocker);
error_free(parent->backing_blocker);
parent->backing_blocker = NULL;
}
/*
* Returns the options and flags that bs->backing should get, based on the
* given options and flags for the parent BDS
@@ -823,7 +882,10 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
*child_flags = flags;
}
static const BdrvChildRole child_backing = {
const BdrvChildRole child_backing = {
.get_parent_desc = bdrv_child_get_parent_desc,
.attach = bdrv_backing_attach,
.detach = bdrv_backing_detach,
.inherit_options = bdrv_backing_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
@@ -1326,15 +1388,352 @@ static int bdrv_fill_options(QDict **options, const char *filename,
return 0;
}
static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
/*
* Check whether permissions on this node can be changed in a way that
* @cumulative_perms and @cumulative_shared_perms are the new cumulative
* permissions of all its parents. This involves checking whether all necessary
* permission changes to child nodes can be performed.
*
* A call to this function must always be followed by a call to bdrv_set_perm()
* or bdrv_abort_perm_update().
*/
static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms,
uint64_t cumulative_shared_perms, Error **errp)
{
BlockDriver *drv = bs->drv;
BdrvChild *c;
int ret;
/* Write permissions never work with read-only images */
if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
bdrv_is_read_only(bs))
{
error_setg(errp, "Block node is read-only");
return -EPERM;
}
/* Check this node */
if (!drv) {
return 0;
}
if (drv->bdrv_check_perm) {
return drv->bdrv_check_perm(bs, cumulative_perms,
cumulative_shared_perms, errp);
}
/* Drivers that never have children can omit .bdrv_child_perm() */
if (!drv->bdrv_child_perm) {
assert(QLIST_EMPTY(&bs->children));
return 0;
}
/* Check all children */
QLIST_FOREACH(c, &bs->children, next) {
uint64_t cur_perm, cur_shared;
drv->bdrv_child_perm(bs, c, c->role,
cumulative_perms, cumulative_shared_perms,
&cur_perm, &cur_shared);
ret = bdrv_child_check_perm(c, cur_perm, cur_shared, errp);
if (ret < 0) {
return ret;
}
}
return 0;
}
/*
* Notifies drivers that after a previous bdrv_check_perm() call, the
* permission update is not performed and any preparations made for it (e.g.
* taken file locks) need to be undone.
*
* This function recursively notifies all child nodes.
*/
static void bdrv_abort_perm_update(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
BdrvChild *c;
if (!drv) {
return;
}
if (drv->bdrv_abort_perm_update) {
drv->bdrv_abort_perm_update(bs);
}
QLIST_FOREACH(c, &bs->children, next) {
bdrv_child_abort_perm_update(c);
}
}
static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
uint64_t cumulative_shared_perms)
{
BlockDriver *drv = bs->drv;
BdrvChild *c;
if (!drv) {
return;
}
/* Update this node */
if (drv->bdrv_set_perm) {
drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
}
/* Drivers that never have children can omit .bdrv_child_perm() */
if (!drv->bdrv_child_perm) {
assert(QLIST_EMPTY(&bs->children));
return;
}
/* Update all children */
QLIST_FOREACH(c, &bs->children, next) {
uint64_t cur_perm, cur_shared;
drv->bdrv_child_perm(bs, c, c->role,
cumulative_perms, cumulative_shared_perms,
&cur_perm, &cur_shared);
bdrv_child_set_perm(c, cur_perm, cur_shared);
}
}
static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
uint64_t *shared_perm)
{
BdrvChild *c;
uint64_t cumulative_perms = 0;
uint64_t cumulative_shared_perms = BLK_PERM_ALL;
QLIST_FOREACH(c, &bs->parents, next_parent) {
cumulative_perms |= c->perm;
cumulative_shared_perms &= c->shared_perm;
}
*perm = cumulative_perms;
*shared_perm = cumulative_shared_perms;
}
static char *bdrv_child_user_desc(BdrvChild *c)
{
if (c->role->get_parent_desc) {
return c->role->get_parent_desc(c);
}
return g_strdup("another user");
}
static char *bdrv_perm_names(uint64_t perm)
{
struct perm_name {
uint64_t perm;
const char *name;
} permissions[] = {
{ BLK_PERM_CONSISTENT_READ, "consistent read" },
{ BLK_PERM_WRITE, "write" },
{ BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
{ BLK_PERM_RESIZE, "resize" },
{ BLK_PERM_GRAPH_MOD, "change children" },
{ 0, NULL }
};
char *result = g_strdup("");
struct perm_name *p;
for (p = permissions; p->name; p++) {
if (perm & p->perm) {
char *old = result;
result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name);
g_free(old);
}
}
return result;
}
/*
* Checks whether a new reference to @bs can be added if the new user requires
* @new_used_perm/@new_shared_perm as its permissions. If @ignore_child is set,
* this old reference is ignored in the calculations; this allows checking
* permission updates for an existing reference.
*
* Needs to be followed by a call to either bdrv_set_perm() or
* bdrv_abort_perm_update(). */
static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm,
uint64_t new_shared_perm,
BdrvChild *ignore_child, Error **errp)
{
BdrvChild *c;
uint64_t cumulative_perms = new_used_perm;
uint64_t cumulative_shared_perms = new_shared_perm;
/* There is no reason why anyone couldn't tolerate write_unchanged */
assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c == ignore_child) {
continue;
}
if ((new_used_perm & c->shared_perm) != new_used_perm) {
char *user = bdrv_child_user_desc(c);
char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
error_setg(errp, "Conflicts with use by %s as '%s', which does not "
"allow '%s' on %s",
user, c->name, perm_names, bdrv_get_node_name(c->bs));
g_free(user);
g_free(perm_names);
return -EPERM;
}
if ((c->perm & new_shared_perm) != c->perm) {
char *user = bdrv_child_user_desc(c);
char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
error_setg(errp, "Conflicts with use by %s as '%s', which uses "
"'%s' on %s",
user, c->name, perm_names, bdrv_get_node_name(c->bs));
g_free(user);
g_free(perm_names);
return -EPERM;
}
cumulative_perms |= c->perm;
cumulative_shared_perms &= c->shared_perm;
}
return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms, errp);
}
/* Needs to be followed by a call to either bdrv_child_set_perm() or
* bdrv_child_abort_perm_update(). */
int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
Error **errp)
{
return bdrv_check_update_perm(c->bs, perm, shared, c, errp);
}
void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
{
uint64_t cumulative_perms, cumulative_shared_perms;
c->perm = perm;
c->shared_perm = shared;
bdrv_get_cumulative_perm(c->bs, &cumulative_perms,
&cumulative_shared_perms);
bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms);
}
void bdrv_child_abort_perm_update(BdrvChild *c)
{
bdrv_abort_perm_update(c->bs);
}
int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
Error **errp)
{
int ret;
ret = bdrv_child_check_perm(c, perm, shared, errp);
if (ret < 0) {
bdrv_child_abort_perm_update(c);
return ret;
}
bdrv_child_set_perm(c, perm, shared);
return 0;
}
#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
| BLK_PERM_WRITE \
| BLK_PERM_WRITE_UNCHANGED \
| BLK_PERM_RESIZE)
#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH)
void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
if (c == NULL) {
*nperm = perm & DEFAULT_PERM_PASSTHROUGH;
*nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
return;
}
*nperm = (perm & DEFAULT_PERM_PASSTHROUGH) |
(c->perm & DEFAULT_PERM_UNCHANGED);
*nshared = (shared & DEFAULT_PERM_PASSTHROUGH) |
(c->shared_perm & DEFAULT_PERM_UNCHANGED);
}
void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
bool backing = (role == &child_backing);
assert(role == &child_backing || role == &child_file);
if (!backing) {
/* Apart from the modifications below, the same permissions are
* forwarded and left alone as for filters */
bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared);
/* Format drivers may touch metadata even if the guest doesn't write */
if (!bdrv_is_read_only(bs)) {
perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
}
/* bs->file always needs to be consistent because of the metadata. We
* can never allow other users to resize or write to it. */
perm |= BLK_PERM_CONSISTENT_READ;
shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
} else {
/* We want consistent read from backing files if the parent needs it.
* No other operations are performed on backing files. */
perm &= BLK_PERM_CONSISTENT_READ;
/* If the parent can deal with changing data, we're okay with a
* writable and resizable backing file. */
/* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */
if (shared & BLK_PERM_WRITE) {
shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
} else {
shared = 0;
}
shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
BLK_PERM_WRITE_UNCHANGED;
}
*nperm = perm;
*nshared = shared;
}
static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs,
bool check_new_perm)
{
BlockDriverState *old_bs = child->bs;
uint64_t perm, shared_perm;
if (old_bs) {
if (old_bs->quiesce_counter && child->role->drained_end) {
child->role->drained_end(child);
}
if (child->role->detach) {
child->role->detach(child);
}
QLIST_REMOVE(child, next_parent);
/* Update permissions for old node. This is guaranteed to succeed
* because we're just taking a parent away, so we're loosening
* restrictions. */
bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
bdrv_check_perm(old_bs, perm, shared_perm, &error_abort);
bdrv_set_perm(old_bs, perm, shared_perm);
}
child->bs = new_bs;
@@ -1344,23 +1743,46 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
if (new_bs->quiesce_counter && child->role->drained_begin) {
child->role->drained_begin(child);
}
bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm);
if (check_new_perm) {
bdrv_check_perm(new_bs, perm, shared_perm, &error_abort);
}
bdrv_set_perm(new_bs, perm, shared_perm);
if (child->role->attach) {
child->role->attach(child);
}
}
}
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role,
void *opaque)
uint64_t perm, uint64_t shared_perm,
void *opaque, Error **errp)
{
BdrvChild *child = g_new(BdrvChild, 1);
BdrvChild *child;
int ret;
ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp);
if (ret < 0) {
bdrv_abort_perm_update(child_bs);
return NULL;
}
child = g_new(BdrvChild, 1);
*child = (BdrvChild) {
.bs = NULL,
.name = g_strdup(child_name),
.role = child_role,
.opaque = opaque,
.bs = NULL,
.name = g_strdup(child_name),
.role = child_role,
.perm = perm,
.shared_perm = shared_perm,
.opaque = opaque,
};
bdrv_replace_child(child, child_bs);
/* This performs the matching bdrv_set_perm() for the above check. */
bdrv_replace_child(child, child_bs, false);
return child;
}
@@ -1368,10 +1790,24 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role)
const BdrvChildRole *child_role,
Error **errp)
{
BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role,
parent_bs);
BdrvChild *child;
uint64_t perm, shared_perm;
bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
assert(parent_bs->drv);
parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role,
perm, shared_perm, &perm, &shared_perm);
child = bdrv_root_attach_child(child_bs, child_name, child_role,
perm, shared_perm, parent_bs, errp);
if (child == NULL) {
return NULL;
}
QLIST_INSERT_HEAD(&parent_bs->children, child, next);
return child;
}
@@ -1383,7 +1819,7 @@ static void bdrv_detach_child(BdrvChild *child)
child->next.le_prev = NULL;
}
bdrv_replace_child(child, NULL);
bdrv_replace_child(child, NULL, false);
g_free(child->name);
g_free(child);
@@ -1447,57 +1883,28 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs)
* Sets the backing file link of a BDS. A new reference is created; callers
* which don't need their own reference any more must call bdrv_unref().
*/
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
Error **errp)
{
if (backing_hd) {
bdrv_ref(backing_hd);
}
if (bs->backing) {
assert(bs->backing_blocker);
bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
bdrv_unref_child(bs, bs->backing);
} else if (backing_hd) {
error_setg(&bs->backing_blocker,
"node is used as backing hd of '%s'",
bdrv_get_device_or_node_name(bs));
}
if (!backing_hd) {
error_free(bs->backing_blocker);
bs->backing_blocker = NULL;
bs->backing = NULL;
goto out;
}
bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
bs->open_flags &= ~BDRV_O_NO_BACKING;
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
pstrcpy(bs->backing_format, sizeof(bs->backing_format),
backing_hd->drv ? backing_hd->drv->format_name : "");
bdrv_op_block_all(backing_hd, bs->backing_blocker);
/* Otherwise we won't be able to commit or stream */
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
bs->backing_blocker);
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
bs->backing_blocker);
/*
* We do backup in 3 ways:
* 1. drive backup
* The target bs is new opened, and the source is top BDS
* 2. blockdev backup
* Both the source and the target are top BDSes.
* 3. internal backup(used for block replication)
* Both the source and the target are backing file
*
* In case 1 and 2, neither the source nor the target is the backing file.
* In case 3, we will block the top BDS, so there is only one block job
* for the top BDS and its backing chain.
*/
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
bs->backing_blocker);
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
bs->backing_blocker);
bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing,
errp);
if (!bs->backing) {
bdrv_unref(backing_hd);
}
out:
bdrv_refresh_limits(bs, NULL);
}
@@ -1580,8 +1987,12 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
/* Hook up the backing file link; drop our reference, bs owns the
* backing_hd reference now */
bdrv_set_backing_hd(bs, backing_hd);
bdrv_set_backing_hd(bs, backing_hd, &local_err);
bdrv_unref(backing_hd);
if (local_err) {
ret = -EINVAL;
goto free_exit;
}
qdict_del(parent_options, bdref_key);
@@ -1648,6 +2059,7 @@ BdrvChild *bdrv_open_child(const char *filename,
const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
BdrvChild *c;
BlockDriverState *bs;
bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
@@ -1656,7 +2068,13 @@ BdrvChild *bdrv_open_child(const char *filename,
return NULL;
}
return bdrv_attach_child(parent, bs, bdref_key, child_role);
c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
if (!c) {
bdrv_unref(bs);
return NULL;
}
return c;
}
static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
@@ -1669,6 +2087,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
int64_t total_size;
QemuOpts *opts = NULL;
BlockDriverState *bs_snapshot;
Error *local_err = NULL;
int ret;
/* if snapshot, we create a temporary backing file and open it
@@ -1718,7 +2137,12 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
* call bdrv_unref() on it), so in order to be able to return one, we have
* to increase bs_snapshot's refcount here */
bdrv_ref(bs_snapshot);
bdrv_append(bs_snapshot, bs);
bdrv_append(bs_snapshot, bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto out;
}
g_free(tmp_filename);
return bs_snapshot;
@@ -1862,9 +2286,12 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
goto fail;
}
if (file_bs != NULL) {
file = blk_new();
blk_insert_bs(file, file_bs);
file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
blk_insert_bs(file, file_bs, &local_err);
bdrv_unref(file_bs);
if (local_err) {
goto fail;
}
qdict_put(options, "file",
qstring_from_str(bdrv_get_node_name(file_bs)));
@@ -2405,7 +2832,7 @@ static void bdrv_close(BlockDriverState *bs)
bs->drv->bdrv_close(bs);
bs->drv = NULL;
bdrv_set_backing_hd(bs, NULL);
bdrv_set_backing_hd(bs, NULL, &error_abort);
if (bs->file != NULL) {
bdrv_unref_child(bs, bs->file);
@@ -2465,10 +2892,13 @@ static void change_parent_backing_link(BlockDriverState *from,
BdrvChild *c, *next, *to_c;
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
if (c->role->stay_at_node) {
continue;
}
if (c->role == &child_backing) {
/* @from is generally not allowed to be a backing file, except for
* when @to is the overlay. In that case, @from may not be replaced
* by @to as @to's backing node. */
/* If @from is a backing file of @to, ignore the child to avoid
* creating a loop. We only want to change the pointer of other
* parents. */
QLIST_FOREACH(to_c, &to->children, next) {
if (to_c == c) {
break;
@@ -2479,9 +2909,10 @@ static void change_parent_backing_link(BlockDriverState *from,
}
}
assert(c->role != &child_backing);
bdrv_ref(to);
bdrv_replace_child(c, to);
/* FIXME Are we sure that bdrv_replace_child() can't run into
* &error_abort because of permissions? */
bdrv_replace_child(c, to, true);
bdrv_unref(from);
}
}
@@ -2502,19 +2933,25 @@ static void change_parent_backing_link(BlockDriverState *from,
* parents of bs_top after bdrv_append() returns. If the caller needs to keep a
* reference of its own, it must call bdrv_ref().
*/
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
Error **errp)
{
assert(!bdrv_requests_pending(bs_top));
assert(!bdrv_requests_pending(bs_new));
Error *local_err = NULL;
bdrv_ref(bs_top);
assert(!atomic_read(&bs_top->in_flight));
assert(!atomic_read(&bs_new->in_flight));
bdrv_set_backing_hd(bs_new, bs_top, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto out;
}
change_parent_backing_link(bs_top, bs_new);
bdrv_set_backing_hd(bs_new, bs_top);
bdrv_unref(bs_top);
/* bs_new is now referenced by its new parents, we don't need the
* additional reference any more. */
out:
bdrv_unref(bs_new);
}
@@ -2658,6 +3095,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
BlockDriverState *base, const char *backing_file_str)
{
BlockDriverState *new_top_bs = NULL;
Error *local_err = NULL;
int ret = -EIO;
if (!top->drv || !base->drv) {
@@ -2690,7 +3128,13 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
if (ret) {
goto exit;
}
bdrv_set_backing_hd(new_top_bs, base);
bdrv_set_backing_hd(new_top_bs, base, &local_err);
if (local_err) {
ret = -EPERM;
error_report_err(local_err);
goto exit;
}
ret = 0;
exit:
@@ -2705,6 +3149,9 @@ int bdrv_truncate(BdrvChild *child, int64_t offset)
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
int ret;
assert(child->perm & BLK_PERM_RESIZE);
if (!drv)
return -ENOMEDIUM;
if (!drv->bdrv_truncate)

View File

@@ -618,14 +618,24 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
job = block_job_create(job_id, &backup_job_driver, bs, speed,
creation_flags, cb, opaque, errp);
/* job->common.len is fixed, so we can't allow resize */
job = block_job_create(job_id, &backup_job_driver, bs,
BLK_PERM_CONSISTENT_READ,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
speed, creation_flags, cb, opaque, errp);
if (!job) {
goto error;
}
job->target = blk_new();
blk_insert_bs(job->target, target);
/* The target must match the source in size, so no resize here either */
job->target = blk_new(BLK_PERM_WRITE,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
ret = blk_insert_bs(job->target, target, errp);
if (ret < 0) {
goto error;
}
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
@@ -652,7 +662,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}
block_job_add_bdrv(&job->common, target);
/* Required permissions are already taken with target's blk_new() */
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
job->common.len = len;
block_job_txn_add_job(txn, &job->common);

View File

@@ -734,6 +734,8 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_file_open = blkdebug_open,
.bdrv_close = blkdebug_close,
.bdrv_reopen_prepare = blkdebug_reopen_prepare,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkdebug_getlength,
.bdrv_truncate = blkdebug_truncate,
.bdrv_refresh_filename = blkdebug_refresh_filename,

View File

@@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = {
.bdrv_file_open = blkreplay_open,
.bdrv_close = blkreplay_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkreplay_getlength,
.bdrv_co_preadv = blkreplay_co_preadv,

View File

@@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = {
.bdrv_parse_filename = blkverify_parse_filename,
.bdrv_file_open = blkverify_open,
.bdrv_close = blkverify_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkverify_getlength,
.bdrv_refresh_filename = blkverify_refresh_filename,

View File

@@ -59,6 +59,9 @@ struct BlockBackend {
bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
uint64_t perm;
uint64_t shared_perm;
bool allow_write_beyond_eof;
NotifierList remove_bs_notifiers, insert_bs_notifiers;
@@ -77,6 +80,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
static char *blk_get_attached_dev_id(BlockBackend *blk);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -99,6 +103,25 @@ static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
static char *blk_root_get_parent_desc(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
char *dev_id;
if (blk->name) {
return g_strdup(blk->name);
}
dev_id = blk_get_attached_dev_id(blk);
if (*dev_id) {
return dev_id;
} else {
/* TODO Callback into the BB owner for something more detailed */
g_free(dev_id);
return g_strdup("a block device");
}
}
static const char *blk_root_get_name(BdrvChild *child)
{
return blk_name(child->opaque);
@@ -110,6 +133,7 @@ static const BdrvChildRole child_root = {
.change_media = blk_root_change_media,
.resize = blk_root_resize,
.get_name = blk_root_get_name,
.get_parent_desc = blk_root_get_parent_desc,
.drained_begin = blk_root_drained_begin,
.drained_end = blk_root_drained_end,
@@ -117,15 +141,23 @@ static const BdrvChildRole child_root = {
/*
* Create a new BlockBackend with a reference count of one.
* Store an error through @errp on failure, unless it's null.
*
* @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
* to request for a block driver node that is attached to this BlockBackend.
* @shared_perm is a bitmask which describes which permissions may be granted
* to other users of the attached node.
* Both sets of permissions can be changed later using blk_set_perm().
*
* Return the new BlockBackend on success, null on failure.
*/
BlockBackend *blk_new(void)
BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
{
BlockBackend *blk;
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
blk->perm = perm;
blk->shared_perm = shared_perm;
blk_set_enable_write_cache(blk, true);
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
@@ -155,15 +187,33 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
{
BlockBackend *blk;
BlockDriverState *bs;
uint64_t perm;
blk = blk_new();
/* blk_new_open() is mainly used in .bdrv_create implementations and the
* tools where sharing isn't a concern because the BDS stays private, so we
* just request permission according to the flags.
*
* The exceptions are xen_disk and blockdev_init(); in these cases, the
* caller of blk_new_open() doesn't make use of the permissions, but they
* shouldn't hurt either. We can still share everything here because the
* guest devices will add their own blockers if they can't share. */
perm = BLK_PERM_CONSISTENT_READ;
if (flags & BDRV_O_RDWR) {
perm |= BLK_PERM_WRITE;
}
if (flags & BDRV_O_RESIZE) {
perm |= BLK_PERM_RESIZE;
}
blk = blk_new(perm, BLK_PERM_ALL);
bs = bdrv_open(filename, reference, options, flags, errp);
if (!bs) {
blk_unref(blk);
return NULL;
}
blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
perm, BLK_PERM_ALL, blk, &error_abort);
return blk;
}
@@ -495,16 +545,49 @@ void blk_remove_bs(BlockBackend *blk)
/*
* Associates a new BlockDriverState with @blk.
*/
void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
blk->perm, blk->shared_perm, blk, errp);
if (blk->root == NULL) {
return -EPERM;
}
bdrv_ref(bs);
blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
if (blk->public.throttle_state) {
throttle_timers_attach_aio_context(
&blk->public.throttle_timers, bdrv_get_aio_context(bs));
}
return 0;
}
/*
* Sets the permission bitmasks that the user of the BlockBackend needs.
*/
int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
Error **errp)
{
int ret;
if (blk->root) {
ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
if (ret < 0) {
return ret;
}
}
blk->perm = perm;
blk->shared_perm = shared_perm;
return 0;
}
void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
{
*perm = blk->perm;
*shared_perm = blk->shared_perm;
}
static int blk_do_attach_dev(BlockBackend *blk, void *dev)
@@ -553,6 +636,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
blk->guest_block_size = 512;
blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
blk_unref(blk);
}
@@ -620,19 +704,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
/*
* Notify @blk's attached device model of media change.
* If @load is true, notify of media load.
* Else, notify of media eject.
*
* If @load is true, notify of media load. This action can fail, meaning that
* the medium cannot be loaded. @errp is set then.
*
* If @load is false, notify of media eject. This can never fail.
*
* Also send DEVICE_TRAY_MOVED events as appropriate.
*/
void blk_dev_change_media_cb(BlockBackend *blk, bool load)
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
{
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
bool tray_was_open, tray_is_open;
Error *local_err = NULL;
assert(!blk->legacy_dev);
tray_was_open = blk_dev_is_tray_open(blk);
blk->dev_ops->change_media_cb(blk->dev_opaque, load);
blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
if (local_err) {
assert(load == true);
error_propagate(errp, local_err);
return;
}
tray_is_open = blk_dev_is_tray_open(blk);
if (tray_was_open != tray_is_open) {
@@ -646,7 +740,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
static void blk_root_change_media(BdrvChild *child, bool load)
{
blk_dev_change_media_cb(child->opaque, load);
blk_dev_change_media_cb(child->opaque, load, NULL);
}
/*

View File

@@ -293,6 +293,7 @@ static BlockDriver bdrv_bochs = {
.instance_size = sizeof(BDRVBochsState),
.bdrv_probe = bochs_probe,
.bdrv_open = bochs_open,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_refresh_limits = bochs_refresh_limits,
.bdrv_co_preadv = bochs_co_preadv,
.bdrv_close = bochs_close,

View File

@@ -290,6 +290,7 @@ static BlockDriver bdrv_cloop = {
.instance_size = sizeof(BDRVCloopState),
.bdrv_probe = cloop_probe,
.bdrv_open = cloop_open,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_refresh_limits = cloop_refresh_limits,
.bdrv_co_preadv = cloop_co_preadv,
.bdrv_close = cloop_close,

View File

@@ -36,6 +36,7 @@ typedef struct CommitBlockJob {
BlockJob common;
RateLimit limit;
BlockDriverState *active;
BlockDriverState *commit_top_bs;
BlockBackend *top;
BlockBackend *base;
BlockdevOnError on_error;
@@ -83,12 +84,23 @@ static void commit_complete(BlockJob *job, void *opaque)
BlockDriverState *active = s->active;
BlockDriverState *top = blk_bs(s->top);
BlockDriverState *base = blk_bs(s->base);
BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
int ret = data->ret;
bool remove_commit_top_bs = false;
/* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
* the normal backing chain can be restored. */
blk_unref(s->base);
if (!block_job_is_cancelled(&s->common) && ret == 0) {
/* success */
ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
s->backing_file_str);
} else if (overlay_bs) {
/* XXX Can (or should) we somehow keep 'consistent read' blocked even
* after the failed/cancelled commit job is gone? If we already wrote
* something to base, the intermediate images aren't valid any more. */
remove_commit_top_bs = true;
}
/* restore base open flags here if appropriate (e.g., change the base back
@@ -102,9 +114,15 @@ static void commit_complete(BlockJob *job, void *opaque)
}
g_free(s->backing_file_str);
blk_unref(s->top);
blk_unref(s->base);
block_job_completed(&s->common, ret);
g_free(data);
/* If bdrv_drop_intermediate() didn't already do that, remove the commit
* filter driver from the backing chain. Do this as the final step so that
* the 'consistent read' permission can be granted. */
if (remove_commit_top_bs) {
bdrv_set_backing_hd(overlay_bs, top, &error_abort);
}
}
static void coroutine_fn commit_run(void *opaque)
@@ -208,10 +226,38 @@ static const BlockJobDriver commit_job_driver = {
.start = commit_run,
};
static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
}
static void bdrv_commit_top_close(BlockDriverState *bs)
{
}
static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
*nperm = 0;
*nshared = BLK_PERM_ALL;
}
/* Dummy node that provides consistent read to its users without requiring it
* from its backing file and that allows writes on the backing file chain. */
static BlockDriver bdrv_commit_top = {
.format_name = "commit_top",
.bdrv_co_preadv = bdrv_commit_top_preadv,
.bdrv_close = bdrv_commit_top_close,
.bdrv_child_perm = bdrv_commit_top_child_perm,
};
void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
Error **errp)
const char *filter_node_name, Error **errp)
{
CommitBlockJob *s;
BlockReopenQueue *reopen_queue = NULL;
@@ -219,7 +265,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
int orig_base_flags;
BlockDriverState *iter;
BlockDriverState *overlay_bs;
BlockDriverState *commit_top_bs = NULL;
Error *local_err = NULL;
int ret;
assert(top != bs);
if (top == base) {
@@ -234,8 +282,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
return;
}
s = block_job_create(job_id, &commit_job_driver, bs, speed,
BLOCK_JOB_DEFAULT, NULL, NULL, errp);
s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
return;
}
@@ -256,30 +304,70 @@ void commit_start(const char *job_id, BlockDriverState *bs,
bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
block_job_unref(&s->common);
return;
goto fail;
}
}
/* Insert commit_top block node above top, so we can block consistent read
* on the backing chain below it */
commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
errp);
if (commit_top_bs == NULL) {
goto fail;
}
bdrv_set_backing_hd(commit_top_bs, top, &error_abort);
bdrv_set_backing_hd(overlay_bs, commit_top_bs, &error_abort);
s->commit_top_bs = commit_top_bs;
bdrv_unref(commit_top_bs);
/* Block all nodes between top and base, because they will
* disappear from the chain after this operation. */
assert(bdrv_chain_contains(top, base));
for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
block_job_add_bdrv(&s->common, iter);
for (iter = top; iter != base; iter = backing_bs(iter)) {
/* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
* at s->base (if writes are blocked for a node, they are also blocked
* for its backing file). The other options would be a second filter
* driver above s->base. */
ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
errp);
if (ret < 0) {
goto fail;
}
}
ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
if (ret < 0) {
goto fail;
}
/* overlay_bs must be blocked because it needs to be modified to
* update the backing image string, but if it's the root node then
* don't block it again */
if (bs != overlay_bs) {
block_job_add_bdrv(&s->common, overlay_bs);
* update the backing image string. */
ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
if (ret < 0) {
goto fail;
}
s->base = blk_new();
blk_insert_bs(s->base, base);
s->base = blk_new(BLK_PERM_CONSISTENT_READ
| BLK_PERM_WRITE
| BLK_PERM_RESIZE,
BLK_PERM_CONSISTENT_READ
| BLK_PERM_GRAPH_MOD
| BLK_PERM_WRITE_UNCHANGED);
ret = blk_insert_bs(s->base, base, errp);
if (ret < 0) {
goto fail;
}
s->top = blk_new();
blk_insert_bs(s->top, top);
/* Required permissions are already taken with block_job_add_bdrv() */
s->top = blk_new(0, BLK_PERM_ALL);
blk_insert_bs(s->top, top, errp);
if (ret < 0) {
goto fail;
}
s->active = bs;
@@ -292,6 +380,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
trace_commit_start(bs, base, top, s);
block_job_start(&s->common);
return;
fail:
if (s->base) {
blk_unref(s->base);
}
if (s->top) {
blk_unref(s->top);
}
if (commit_top_bs) {
bdrv_set_backing_hd(overlay_bs, top, &error_abort);
}
block_job_unref(&s->common);
}
@@ -301,11 +402,14 @@ void commit_start(const char *job_id, BlockDriverState *bs,
int bdrv_commit(BlockDriverState *bs)
{
BlockBackend *src, *backing;
BlockDriverState *backing_file_bs = NULL;
BlockDriverState *commit_top_bs = NULL;
BlockDriver *drv = bs->drv;
int64_t sector, total_sectors, length, backing_length;
int n, ro, open_flags;
int ret = 0;
uint8_t *buf = NULL;
Error *local_err = NULL;
if (!drv)
return -ENOMEDIUM;
@@ -328,11 +432,33 @@ int bdrv_commit(BlockDriverState *bs)
}
}
src = blk_new();
blk_insert_bs(src, bs);
src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
backing = blk_new();
blk_insert_bs(backing, bs->backing->bs);
ret = blk_insert_bs(src, bs, &local_err);
if (ret < 0) {
error_report_err(local_err);
goto ro_cleanup;
}
/* Insert commit_top block node above backing, so we can write to it */
backing_file_bs = backing_bs(bs);
commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
&local_err);
if (commit_top_bs == NULL) {
error_report_err(local_err);
goto ro_cleanup;
}
bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
ret = blk_insert_bs(backing, backing_file_bs, &local_err);
if (ret < 0) {
error_report_err(local_err);
goto ro_cleanup;
}
length = blk_getlength(src);
if (length < 0) {
@@ -404,8 +530,12 @@ int bdrv_commit(BlockDriverState *bs)
ro_cleanup:
qemu_vfree(buf);
blk_unref(src);
blk_unref(backing);
if (backing_file_bs) {
bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
}
bdrv_unref(commit_top_bs);
blk_unref(src);
if (ro) {
/* ignoring error return here */

View File

@@ -628,6 +628,7 @@ BlockDriver bdrv_crypto_luks = {
.bdrv_probe = block_crypto_probe_luks,
.bdrv_open = block_crypto_open_luks,
.bdrv_close = block_crypto_close,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = block_crypto_create_luks,
.bdrv_truncate = block_crypto_truncate,
.create_opts = &block_crypto_create_opts_luks,

View File

@@ -697,6 +697,7 @@ static BlockDriver bdrv_dmg = {
.bdrv_probe = dmg_probe,
.bdrv_open = dmg_open,
.bdrv_refresh_limits = dmg_refresh_limits,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_preadv = dmg_co_preadv,
.bdrv_close = dmg_close,
};

View File

@@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
}
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
{
BlockDriverState *bs = child->bs;
/* Perform I/O through a temporary buffer so that users who scribble over
* their read buffer while the operation is in progress do not end up
* modifying the image file. This is critical for zero-copy guest I/O
@@ -943,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
size_t skip_bytes;
int ret;
assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
@@ -1001,10 +1005,11 @@ err:
* handles copy on read, zeroing after EOF, and fragmentation of large
* reads; any other features must be implemented by the caller.
*/
static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
BlockDriverState *bs = child->bs;
int64_t total_bytes, max_bytes;
int ret = 0;
uint64_t bytes_remaining = bytes;
@@ -1050,7 +1055,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (!ret || pnum != nb_sectors) {
ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
goto out;
}
}
@@ -1158,7 +1163,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
}
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
tracked_request_end(&req);
@@ -1306,10 +1311,11 @@ fail:
* Forwards an already correctly aligned write request to the BlockDriver,
* after possibly fragmenting it.
*/
static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
bool waited;
int ret;
@@ -1332,6 +1338,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
assert(!waited || !req->serialising);
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
assert(child->perm & BLK_PERM_WRITE);
assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
@@ -1397,12 +1405,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
return ret;
}
static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
int64_t offset,
unsigned int bytes,
BdrvRequestFlags flags,
BdrvTrackedRequest *req)
{
BlockDriverState *bs = child->bs;
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
struct iovec iov;
@@ -1430,7 +1439,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1438,7 +1447,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
memset(buf + head_padding_bytes, 0, zero_bytes);
ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
align, &local_qiov,
flags & ~BDRV_REQ_ZERO_WRITE);
if (ret < 0) {
@@ -1452,7 +1461,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
if (bytes >= align) {
/* Write the aligned part in the middle. */
uint64_t aligned_bytes = bytes & ~(align - 1);
ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
NULL, flags);
if (ret < 0) {
goto fail;
@@ -1468,7 +1477,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(bs, req, offset, align,
ret = bdrv_aligned_preadv(child, req, offset, align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1476,7 +1485,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
memset(buf, 0, bytes);
ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
ret = bdrv_aligned_pwritev(child, req, offset, align, align,
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
}
fail:
@@ -1523,7 +1532,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
if (!qiov) {
ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
goto out;
}
@@ -1542,7 +1551,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
qemu_iovec_init_external(&head_qiov, &head_iov, 1);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
align, &head_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1584,8 +1593,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
align, &tail_qiov, 0);
ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
align, align, &tail_qiov, 0);
if (ret < 0) {
goto fail;
}
@@ -1603,7 +1612,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
bytes = ROUND_UP(bytes, align);
}
ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);

View File

@@ -38,7 +38,10 @@ typedef struct MirrorBlockJob {
BlockJob common;
RateLimit limit;
BlockBackend *target;
BlockDriverState *mirror_top_bs;
BlockDriverState *source;
BlockDriverState *base;
/* The name of the graph node to replace */
char *replaces;
/* The BDS to replace */
@@ -327,7 +330,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
BlockDriverState *source = blk_bs(s->common.blk);
BlockDriverState *source = s->source;
int64_t sector_num, first_chunk;
uint64_t delay_ns = 0;
/* At least the first dirty chunk is mirrored in one iteration. */
@@ -497,12 +500,30 @@ static void mirror_exit(BlockJob *job, void *opaque)
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
MirrorExitData *data = opaque;
AioContext *replace_aio_context = NULL;
BlockDriverState *src = blk_bs(s->common.blk);
BlockDriverState *src = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
Error *local_err = NULL;
/* Make sure that the source BDS doesn't go away before we called
* block_job_completed(). */
bdrv_ref(src);
bdrv_ref(mirror_top_bs);
/* We don't access the source any more. Dropping any WRITE/RESIZE is
* required before it could become a backing file of target_bs. */
bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
&error_abort);
if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
BlockDriverState *backing = s->is_none_mode ? src : s->base;
if (backing_bs(target_bs) != backing) {
bdrv_set_backing_hd(target_bs, backing, &local_err);
if (local_err) {
error_report_err(local_err);
data->ret = -EPERM;
}
}
}
if (s->to_replace) {
replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -524,10 +545,6 @@ static void mirror_exit(BlockJob *job, void *opaque)
bdrv_drained_begin(target_bs);
bdrv_replace_in_backing_chain(to_replace, target_bs);
bdrv_drained_end(target_bs);
/* We just changed the BDS the job BB refers to */
blk_remove_bs(job->blk);
blk_insert_bs(job->blk, src);
}
if (s->to_replace) {
bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -540,9 +557,26 @@ static void mirror_exit(BlockJob *job, void *opaque)
g_free(s->replaces);
blk_unref(s->target);
s->target = NULL;
/* Remove the mirror filter driver from the graph. Before this, get rid of
* the blockers on the intermediate nodes so that the resulting state is
* valid. */
block_job_remove_all_bdrv(job);
bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
/* We just changed the BDS the job BB refers to (with either or both of the
* bdrv_replace_in_backing_chain() calls), so switch the BB back so the
* cleanup does the right thing. We don't need any permissions any more
* now. */
blk_remove_bs(job->blk);
blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
blk_insert_bs(job->blk, mirror_top_bs, &error_abort);
block_job_completed(&s->common, data->ret);
g_free(data);
bdrv_drained_end(src);
bdrv_unref(mirror_top_bs);
bdrv_unref(src);
}
@@ -562,7 +596,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
{
int64_t sector_num, end;
BlockDriverState *base = s->base;
BlockDriverState *bs = blk_bs(s->common.blk);
BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
int ret, n;
@@ -644,7 +678,7 @@ static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
MirrorExitData *data;
BlockDriverState *bs = blk_bs(s->common.blk);
BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
bool need_drain = true;
int64_t length;
@@ -876,9 +910,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
static void mirror_complete(BlockJob *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
BlockDriverState *src, *target;
BlockDriverState *target;
src = blk_bs(job->blk);
target = blk_bs(s->target);
if (!s->synced) {
@@ -910,6 +943,10 @@ static void mirror_complete(BlockJob *job, Error **errp)
replace_aio_context = bdrv_get_aio_context(s->to_replace);
aio_context_acquire(replace_aio_context);
/* TODO Translate this into permission system. Current definition of
* GRAPH_MOD would require to request it for the parents; they might
* not even be BlockDriverStates, however, so a BdrvChild can't address
* them. May need redefinition of GRAPH_MOD. */
error_setg(&s->replace_blocker,
"block device is in use by block-job-complete");
bdrv_op_block_all(s->to_replace, s->replace_blocker);
@@ -918,13 +955,6 @@ static void mirror_complete(BlockJob *job, Error **errp)
aio_context_release(replace_aio_context);
}
if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
BlockDriverState *backing = s->is_none_mode ? src : s->base;
if (backing_bs(target) != backing) {
bdrv_set_backing_hd(target, backing);
}
}
s->should_complete = true;
block_job_enter(&s->common);
}
@@ -980,6 +1010,77 @@ static const BlockJobDriver commit_active_job_driver = {
.drain = mirror_drain,
};
static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
}
static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
}
static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
{
return bdrv_co_flush(bs->backing->bs);
}
static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
BlockDriverState **file)
{
*pnum = nb_sectors;
*file = bs->backing->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
(sector_num << BDRV_SECTOR_BITS);
}
static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
{
return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags);
}
static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
{
return bdrv_co_pdiscard(bs->backing->bs, offset, count);
}
static void bdrv_mirror_top_close(BlockDriverState *bs)
{
}
static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
/* Must be able to forward guest writes to the real image */
*nperm = 0;
if (perm & BLK_PERM_WRITE) {
*nperm |= BLK_PERM_WRITE;
}
*nshared = BLK_PERM_ALL;
}
/* Dummy node that provides consistent read to its users without requiring it
* from its backing file and that allows writes on the backing file chain. */
static BlockDriver bdrv_mirror_top = {
.format_name = "mirror_top",
.bdrv_co_preadv = bdrv_mirror_top_preadv,
.bdrv_co_pwritev = bdrv_mirror_top_pwritev,
.bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes,
.bdrv_co_pdiscard = bdrv_mirror_top_pdiscard,
.bdrv_co_flush = bdrv_mirror_top_flush,
.bdrv_co_get_block_status = bdrv_mirror_top_get_block_status,
.bdrv_close = bdrv_mirror_top_close,
.bdrv_child_perm = bdrv_mirror_top_child_perm,
};
static void mirror_start_job(const char *job_id, BlockDriverState *bs,
int creation_flags, BlockDriverState *target,
const char *replaces, int64_t speed,
@@ -992,9 +1093,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
void *opaque, Error **errp,
const BlockJobDriver *driver,
bool is_none_mode, BlockDriverState *base,
bool auto_complete)
bool auto_complete, const char *filter_node_name)
{
MirrorBlockJob *s;
BlockDriverState *mirror_top_bs;
bool target_graph_mod;
bool target_is_backing;
Error *local_err = NULL;
int ret;
if (granularity == 0) {
granularity = bdrv_get_default_bitmap_granularity(target);
@@ -1011,14 +1117,62 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
buf_size = DEFAULT_MIRROR_BUF_SIZE;
}
s = block_job_create(job_id, driver, bs, speed, creation_flags,
cb, opaque, errp);
if (!s) {
/* In the case of active commit, add dummy driver to provide consistent
* reads on the top, while disabling it in the intermediate nodes, and make
* the backing chain writable. */
mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name,
BDRV_O_RDWR, errp);
if (mirror_top_bs == NULL) {
return;
}
mirror_top_bs->total_sectors = bs->total_sectors;
/* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
* it alive until block_job_create() even if bs has no parent. */
bdrv_ref(mirror_top_bs);
bdrv_drained_begin(bs);
bdrv_append(mirror_top_bs, bs, &local_err);
bdrv_drained_end(bs);
if (local_err) {
bdrv_unref(mirror_top_bs);
error_propagate(errp, local_err);
return;
}
s->target = blk_new();
blk_insert_bs(s->target, target);
/* Make sure that the source is not resized while the job is running */
s = block_job_create(job_id, driver, mirror_top_bs,
BLK_PERM_CONSISTENT_READ,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
creation_flags, cb, opaque, errp);
bdrv_unref(mirror_top_bs);
if (!s) {
goto fail;
}
s->source = bs;
s->mirror_top_bs = mirror_top_bs;
/* No resize for the target either; while the mirror is still running, a
* consistent read isn't necessarily possible. We could possibly allow
* writes and graph modifications, though it would likely defeat the
* purpose of a mirror, so leave them blocked for now.
*
* In the case of active commit, things look a bit different, though,
* because the target is an already populated backing file in active use.
* We can allow anything except resize there.*/
target_is_backing = bdrv_chain_contains(bs, target);
target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE |
(target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
BLK_PERM_WRITE_UNCHANGED |
(target_is_backing ? BLK_PERM_CONSISTENT_READ |
BLK_PERM_WRITE |
BLK_PERM_GRAPH_MOD : 0));
ret = blk_insert_bs(s->target, target, errp);
if (ret < 0) {
goto fail;
}
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
@@ -1041,18 +1195,40 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
return;
}
block_job_add_bdrv(&s->common, target);
/* Required permissions are already taken with blk_new() */
block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
/* In commit_active_start() all intermediate nodes disappear, so
* any jobs in them must be blocked */
if (bdrv_chain_contains(bs, target)) {
if (target_is_backing) {
BlockDriverState *iter;
for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
block_job_add_bdrv(&s->common, iter);
/* XXX BLK_PERM_WRITE needs to be allowed so we don't block
* ourselves at s->base (if writes are blocked for a node, they are
* also blocked for its backing file). The other options would be a
* second filter driver above s->base (== target). */
ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
errp);
if (ret < 0) {
goto fail;
}
}
}
trace_mirror_start(bs, s, opaque);
block_job_start(&s->common);
return;
fail:
if (s) {
g_free(s->replaces);
blk_unref(s->target);
block_job_unref(&s->common);
}
bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
}
void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1061,7 +1237,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap, Error **errp)
bool unmap, const char *filter_node_name, Error **errp)
{
bool is_none_mode;
BlockDriverState *base;
@@ -1075,12 +1251,14 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
speed, granularity, buf_size, backing_mode,
on_source_error, on_target_error, unmap, NULL, NULL, errp,
&mirror_job_driver, is_none_mode, base, false);
&mirror_job_driver, is_none_mode, base, false,
filter_node_name);
}
void commit_active_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, int creation_flags,
int64_t speed, BlockdevOnError on_error,
const char *filter_node_name,
BlockCompletionFunc *cb, void *opaque, Error **errp,
bool auto_complete)
{
@@ -1096,7 +1274,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN,
on_error, on_error, true, cb, opaque, &local_err,
&commit_active_job_driver, false, base, auto_complete);
&commit_active_job_driver, false, base, auto_complete,
filter_node_name);
if (local_err) {
error_propagate(errp, local_err);
goto error_restore_flags;

View File

@@ -488,7 +488,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
}
file = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (file == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -762,6 +763,7 @@ static BlockDriver bdrv_parallels = {
.bdrv_probe = parallels_probe,
.bdrv_open = parallels_open,
.bdrv_close = parallels_close,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_get_block_status = parallels_co_get_block_status,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_flush_to_os = parallels_co_flush_to_os,

View File

@@ -823,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
}
qcow_blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (qcow_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1052,6 +1053,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_probe = qcow_probe,
.bdrv_open = qcow_open,
.bdrv_close = qcow_close,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_reopen_prepare = qcow_reopen_prepare,
.bdrv_create = qcow_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,

View File

@@ -2202,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -2266,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow2"));
blk = blk_new_open(filename, NULL, options,
BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -3113,6 +3115,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
uint64_t cluster_size = s->cluster_size;
bool encrypt;
int refcount_bits = s->refcount_bits;
Error *local_err = NULL;
int ret;
QemuOptDesc *desc = opts->list->desc;
Qcow2AmendHelperCBInfo helper_cb_info;
@@ -3262,11 +3265,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
}
if (new_size) {
BlockBackend *blk = blk_new();
blk_insert_bs(blk, bs);
BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
ret = blk_insert_bs(blk, bs, &local_err);
if (ret < 0) {
error_report_err(local_err);
blk_unref(blk);
return ret;
}
ret = blk_truncate(blk, new_size);
blk_unref(blk);
if (ret < 0) {
return ret;
}
@@ -3403,6 +3411,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_reopen_commit = qcow2_reopen_commit,
.bdrv_reopen_abort = qcow2_reopen_abort,
.bdrv_join_options = qcow2_join_options,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = qcow2_co_get_block_status,

View File

@@ -625,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size,
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -1704,6 +1705,7 @@ static BlockDriver bdrv_qed = {
.bdrv_open = bdrv_qed_open,
.bdrv_close = bdrv_qed_close,
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = bdrv_qed_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = bdrv_qed_co_get_block_status,

View File

@@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
/* We can safely add the child now */
bdrv_ref(child_bs);
child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
if (child == NULL) {
s->next_child_index--;
bdrv_unref(child_bs);
goto out;
}
s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
s->children[s->num_children++] = child;
out:
bdrv_drained_end(bs);
}
@@ -1126,6 +1133,8 @@ static BlockDriver bdrv_quorum = {
.bdrv_add_child = quorum_add_child,
.bdrv_del_child = quorum_del_child,
.bdrv_child_perm = bdrv_filter_default_perms,
.is_filter = true,
.bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
};

View File

@@ -467,6 +467,7 @@ BlockDriver bdrv_raw = {
.bdrv_reopen_abort = &raw_reopen_abort,
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_create = &raw_create,
.bdrv_co_preadv = &raw_co_preadv,
.bdrv_co_pwritev = &raw_co_pwritev,

View File

@@ -644,7 +644,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
s->replication_state = BLOCK_REPLICATION_FAILOVER;
commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
replication_done, bs, errp, true);
NULL, replication_done, bs, errp, true);
break;
default:
aio_context_release(aio_context);
@@ -660,6 +660,7 @@ BlockDriver bdrv_replication = {
.bdrv_open = replication_open,
.bdrv_close = replication_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = replication_getlength,
.bdrv_co_readv = replication_co_readv,

View File

@@ -1609,7 +1609,7 @@ static int sd_prealloc(const char *filename, Error **errp)
int ret;
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
if (blk == NULL) {
ret = -EIO;
goto out_with_err_set;

View File

@@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque)
StreamCompleteData *data = opaque;
BlockDriverState *bs = blk_bs(job->blk);
BlockDriverState *base = s->base;
Error *local_err = NULL;
if (!block_job_is_cancelled(&s->common) && data->reached_end &&
data->ret == 0) {
@@ -79,11 +80,19 @@ static void stream_complete(BlockJob *job, void *opaque)
}
}
data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
bdrv_set_backing_hd(bs, base);
bdrv_set_backing_hd(bs, base, &local_err);
if (local_err) {
error_report_err(local_err);
data->ret = -EPERM;
goto out;
}
}
out:
/* Reopen the image back in read-only mode if necessary */
if (s->bs_flags != bdrv_get_flags(bs)) {
/* Give up write permissions before making it read-only */
blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
bdrv_reopen(bs, s->bs_flags, NULL);
}
@@ -229,25 +238,35 @@ void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *iter;
int orig_bs_flags;
s = block_job_create(job_id, &stream_job_driver, bs, speed,
BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
return;
}
/* Make sure that the image is opened in read-write mode */
orig_bs_flags = bdrv_get_flags(bs);
if (!(orig_bs_flags & BDRV_O_RDWR)) {
if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
block_job_unref(&s->common);
return;
}
}
/* Block all intermediate nodes between bs and base, because they
* will disappear from the chain after this operation */
/* Prevent concurrent jobs trying to modify the graph structure here, we
* already have our own plans. Also don't allow resize as the image size is
* queried only at the job start and then cached. */
s = block_job_create(job_id, &stream_job_driver, bs,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_GRAPH_MOD,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_WRITE,
speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
goto fail;
}
/* Block all intermediate nodes between bs and base, because they will
* disappear from the chain after this operation. The streaming job reads
* every block only once, assuming that it doesn't change, so block writes
* and resizes. */
for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
block_job_add_bdrv(&s->common, iter);
block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
&error_abort);
}
s->base = base;
@@ -257,4 +276,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
s->on_error = on_error;
trace_stream_start(bs, base, s);
block_job_start(&s->common);
return;
fail:
if (orig_bs_flags != bdrv_get_flags(bs)) {
bdrv_reopen(bs, s->bs_flags, NULL);
}
}

View File

@@ -763,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -891,6 +892,7 @@ static BlockDriver bdrv_vdi = {
.bdrv_open = vdi_open,
.bdrv_close = vdi_close,
.bdrv_reopen_prepare = vdi_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = vdi_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = vdi_co_get_block_status,

View File

@@ -1859,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1983,6 +1984,7 @@ static BlockDriver bdrv_vhdx = {
.bdrv_open = vhdx_open,
.bdrv_close = vhdx_close,
.bdrv_reopen_prepare = vhdx_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_readv = vhdx_co_readv,
.bdrv_co_writev = vhdx_co_writev,
.bdrv_create = vhdx_create,

View File

@@ -1703,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -2071,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
}
new_blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (new_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -2359,6 +2361,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_open = vmdk_open,
.bdrv_check = vmdk_check,
.bdrv_reopen_prepare = vmdk_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_preadv = vmdk_co_preadv,
.bdrv_co_pwritev = vmdk_co_pwritev,
.bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed,

View File

@@ -915,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1067,6 +1068,7 @@ static BlockDriver bdrv_vpc = {
.bdrv_open = vpc_open,
.bdrv_close = vpc_close,
.bdrv_reopen_prepare = vpc_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = vpc_create,
.bdrv_co_preadv = vpc_co_preadv,

View File

@@ -3041,7 +3041,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
&error_abort);
*(void**) backing->opaque = s;
bdrv_set_backing_hd(s->bs, backing);
bdrv_set_backing_hd(s->bs, backing, &error_abort);
bdrv_unref(backing);
return 0;
@@ -3052,6 +3052,27 @@ err:
return ret;
}
static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
BDRVVVFATState *s = bs->opaque;
assert(c == s->qcow || role == &child_backing);
if (c == s->qcow) {
/* This is a private node, nobody should try to attach to it */
*nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
*nshared = BLK_PERM_WRITE_UNCHANGED;
} else {
/* The backing file is there so 'commit' can use it. vvfat doesn't
* access it in any way. */
*nperm = 0;
*nshared = BLK_PERM_ALL;
}
}
static void vvfat_close(BlockDriverState *bs)
{
BDRVVVFATState *s = bs->opaque;
@@ -3077,6 +3098,7 @@ static BlockDriver bdrv_vvfat = {
.bdrv_file_open = vvfat_open,
.bdrv_refresh_limits = vvfat_refresh_limits,
.bdrv_close = vvfat_close,
.bdrv_child_perm = vvfat_child_perm,
.bdrv_co_preadv = vvfat_co_preadv,
.bdrv_co_pwritev = vvfat_co_pwritev,

View File

@@ -558,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
if ((!file || !*file) && !qdict_size(bs_opts)) {
BlockBackendRootState *blk_rs;
blk = blk_new();
blk = blk_new(0, BLK_PERM_ALL);
blk_rs = blk_get_root_state(blk);
blk_rs->open_flags = bdrv_flags;
blk_rs->read_only = read_only;
@@ -1768,6 +1768,17 @@ static void external_snapshot_prepare(BlkActionState *common,
if (!state->new_bs->drv->supports_backing) {
error_setg(errp, "The snapshot does not support backing images");
return;
}
/* This removes our old bs and adds the new bs. This is an operation that
* can fail, so we need to do it in .prepare; undoing it for abort is
* always possible. */
bdrv_ref(state->new_bs);
bdrv_append(state->new_bs, state->old_bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
}
@@ -1778,8 +1789,6 @@ static void external_snapshot_commit(BlkActionState *common)
bdrv_set_aio_context(state->new_bs, state->aio_context);
/* This removes our old bs and adds the new bs */
bdrv_append(state->new_bs, state->old_bs);
/* We don't need (or want) to use the transactional
* bdrv_reopen_multiple() across all the entries at once, because we
* don't want to abort all of them if one of them fails the reopen */
@@ -1794,7 +1803,9 @@ static void external_snapshot_abort(BlkActionState *common)
ExternalSnapshotState *state =
DO_UPCAST(ExternalSnapshotState, common, common);
if (state->new_bs) {
bdrv_unref(state->new_bs);
if (state->new_bs->backing) {
bdrv_replace_in_backing_chain(state->new_bs, state->old_bs);
}
}
}
@@ -1805,6 +1816,7 @@ static void external_snapshot_clean(BlkActionState *common)
if (state->aio_context) {
bdrv_drained_end(state->old_bs);
aio_context_release(state->aio_context);
bdrv_unref(state->new_bs);
}
}
@@ -2311,7 +2323,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id,
}
if (!locked || force) {
blk_dev_change_media_cb(blk, false);
blk_dev_change_media_cb(blk, false, &error_abort);
}
if (locked && !force) {
@@ -2349,6 +2361,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
Error **errp)
{
BlockBackend *blk;
Error *local_err = NULL;
device = has_device ? device : NULL;
id = has_id ? id : NULL;
@@ -2372,7 +2385,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
return;
}
blk_dev_change_media_cb(blk, true);
blk_dev_change_media_cb(blk, true, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
}
void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
@@ -2425,7 +2442,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
* called at all); therefore, the medium needs to be ejected here.
* Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load
* value passed here (i.e. false). */
blk_dev_change_media_cb(blk, false);
blk_dev_change_media_cb(blk, false, &error_abort);
}
out:
@@ -2435,7 +2452,9 @@ out:
static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
BlockDriverState *bs, Error **errp)
{
Error *local_err = NULL;
bool has_device;
int ret;
/* For BBs without a device, we can exchange the BDS tree at will */
has_device = blk_get_attached_dev(blk);
@@ -2455,7 +2474,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
return;
}
blk_insert_bs(blk, bs);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
return;
}
if (!blk_dev_has_tray(blk)) {
/* For tray-less devices, blockdev-close-tray is a no-op (or may not be
@@ -2463,7 +2485,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
* slot here.
* Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load
* value passed here (i.e. true). */
blk_dev_change_media_cb(blk, true);
blk_dev_change_media_cb(blk, true, &local_err);
if (local_err) {
error_propagate(errp, local_err);
blk_remove_bs(blk);
return;
}
}
}
@@ -2890,8 +2917,11 @@ void qmp_block_resize(bool has_device, const char *device,
goto out;
}
blk = blk_new();
blk_insert_bs(blk, bs);
blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
goto out;
}
/* complete all in-flight operations before resizing the device */
bdrv_drain_all();
@@ -3014,6 +3044,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
bool has_top, const char *top,
bool has_backing_file, const char *backing_file,
bool has_speed, int64_t speed,
bool has_filter_node_name, const char *filter_node_name,
Error **errp)
{
BlockDriverState *bs;
@@ -3029,6 +3060,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
if (!has_speed) {
speed = 0;
}
if (!has_filter_node_name) {
filter_node_name = NULL;
}
/* Important Note:
* libvirt relies on the DeviceNotFound error class in order to probe for
@@ -3103,8 +3137,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
goto out;
}
commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL,
&local_err, false);
BLOCK_JOB_DEFAULT, speed, on_error,
filter_node_name, NULL, NULL, &local_err, false);
} else {
BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
@@ -3112,7 +3146,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
}
commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
on_error, has_backing_file ? backing_file : NULL,
&local_err);
filter_node_name, &local_err);
}
if (local_err != NULL) {
error_propagate(errp, local_err);
@@ -3348,6 +3382,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
bool has_on_target_error,
BlockdevOnError on_target_error,
bool has_unmap, bool unmap,
bool has_filter_node_name,
const char *filter_node_name,
Error **errp)
{
@@ -3369,6 +3405,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_unmap) {
unmap = true;
}
if (!has_filter_node_name) {
filter_node_name = NULL;
}
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -3398,7 +3437,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
mirror_start(job_id, bs, target,
has_replaces ? replaces : NULL,
speed, granularity, buf_size, sync, backing_mode,
on_source_error, on_target_error, unmap, errp);
on_source_error, on_target_error, unmap, filter_node_name,
errp);
}
void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3536,6 +3576,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
arg->has_on_source_error, arg->on_source_error,
arg->has_on_target_error, arg->on_target_error,
arg->has_unmap, arg->unmap,
false, NULL,
&local_err);
bdrv_unref(target_bs);
error_propagate(errp, local_err);
@@ -3554,6 +3595,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
BlockdevOnError on_source_error,
bool has_on_target_error,
BlockdevOnError on_target_error,
bool has_filter_node_name,
const char *filter_node_name,
Error **errp)
{
BlockDriverState *bs;
@@ -3585,6 +3628,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
has_on_source_error, on_source_error,
has_on_target_error, on_target_error,
true, true,
has_filter_node_name, filter_node_name,
&local_err);
error_propagate(errp, local_err);

View File

@@ -55,6 +55,19 @@ struct BlockJobTxn {
static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
static char *child_job_get_parent_desc(BdrvChild *c)
{
BlockJob *job = c->opaque;
return g_strdup_printf("%s job '%s'",
BlockJobType_lookup[job->driver->job_type],
job->id);
}
static const BdrvChildRole child_job = {
.get_parent_desc = child_job_get_parent_desc,
.stay_at_node = true,
};
BlockJob *block_job_next(BlockJob *job)
{
if (!job) {
@@ -115,19 +128,44 @@ static void block_job_detach_aio_context(void *opaque)
block_job_unref(job);
}
void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
void block_job_remove_all_bdrv(BlockJob *job)
{
job->nodes = g_slist_prepend(job->nodes, bs);
GSList *l;
for (l = job->nodes; l; l = l->next) {
BdrvChild *c = l->data;
bdrv_op_unblock_all(c->bs, job->blocker);
bdrv_root_unref_child(c);
}
g_slist_free(job->nodes);
job->nodes = NULL;
}
int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
uint64_t perm, uint64_t shared_perm, Error **errp)
{
BdrvChild *c;
c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
job, errp);
if (c == NULL) {
return -EPERM;
}
job->nodes = g_slist_prepend(job->nodes, c);
bdrv_ref(bs);
bdrv_op_block_all(bs, job->blocker);
return 0;
}
void *block_job_create(const char *job_id, const BlockJobDriver *driver,
BlockDriverState *bs, int64_t speed, int flags,
BlockDriverState *bs, uint64_t perm,
uint64_t shared_perm, int64_t speed, int flags,
BlockCompletionFunc *cb, void *opaque, Error **errp)
{
BlockBackend *blk;
BlockJob *job;
int ret;
if (bs->job) {
error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
@@ -159,13 +197,17 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
}
}
blk = blk_new();
blk_insert_bs(blk, bs);
blk = blk_new(perm, shared_perm);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
blk_unref(blk);
return NULL;
}
job = g_malloc0(driver->instance_size);
error_setg(&job->blocker, "block device is in use by block job: %s",
BlockJobType_lookup[driver->job_type]);
block_job_add_bdrv(job, bs);
block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
job->driver = driver;
@@ -228,15 +270,9 @@ void block_job_ref(BlockJob *job)
void block_job_unref(BlockJob *job)
{
if (--job->refcnt == 0) {
GSList *l;
BlockDriverState *bs = blk_bs(job->blk);
bs->job = NULL;
for (l = job->nodes; l; l = l->next) {
bs = l->data;
bdrv_op_unblock_all(bs, job->blocker);
bdrv_unref(bs);
}
g_slist_free(job->nodes);
block_job_remove_all_bdrv(job);
blk_remove_aio_context_notifier(job->blk,
block_job_attached_aio_context,
block_job_detach_aio_context, job);

View File

@@ -42,6 +42,8 @@ CONFIG_ARM11MPCORE=y
CONFIG_A9MPCORE=y
CONFIG_A15MPCORE=y
CONFIG_ARM_V7M=y
CONFIG_ARM_GIC=y
CONFIG_ARM_GIC_KVM=$(CONFIG_KVM)
CONFIG_ARM_TIMER=y

View File

@@ -161,6 +161,11 @@ include/hw/hw.h.
=== More about versions ===
Version numbers are intended for major incompatible changes to the
migration of a device, and using them breaks backwards-migration
compatibility; in general most changes can be made by adding Subsections
(see below) or _TEST macros (see below) which won't break compatibility.
You can see that there are several version fields:
- version_id: the maximum version_id supported by VMState for that device.
@@ -175,6 +180,9 @@ version_id. And the function load_state_old() (if present) is able to
load state from minimum_version_id_old to minimum_version_id. This
function is deprecated and will be removed when no more users are left.
Saving state will always create a section with the 'version_id' value
and thus can't be loaded by any older QEMU.
=== Massaging functions ===
Sometimes, it is not enough to be able to save the state directly
@@ -292,6 +300,56 @@ save/send this state when we are in the middle of a pio operation
not enabled, the values on that fields are garbage and don't need to
be sent.
Using a condition function that checks a 'property' to determine whether
to send a subsection allows backwards migration compatibility when
new subsections are added.
For example;
a) Add a new property using DEFINE_PROP_BOOL - e.g. support-foo and
default it to true.
b) Add an entry to the HW_COMPAT_ for the previous version
that sets the property to false.
c) Add a static bool support_foo function that tests the property.
d) Add a subsection with a .needed set to the support_foo function
e) (potentially) Add a pre_load that sets up a default value for 'foo'
to be used if the subsection isn't loaded.
Now that subsection will not be generated when using an older
machine type and the migration stream will be accepted by older
QEMU versions. pre-load functions can be used to initialise state
on the newer version so that they default to suitable values
when loading streams created by older QEMU versions that do not
generate the subsection.
In some cases subsections are added for data that had been accidentally
omitted by earlier versions; if the missing data causes the migration
process to succeed but the guest to behave badly then it may be better
to send the subsection and cause the migration to explicitly fail
with the unknown subsection error. If the bad behaviour only happens
with certain data values, making the subsection conditional on
the data value (rather than the machine type) allows migrations to succeed
in most cases. In general the preference is to tie the subsection to
the machine type, and allow reliable migrations, unless the behaviour
from omission of the subsection is really bad.
= Not sending existing elements =
Sometimes members of the VMState are no longer needed;
removing them will break migration compatibility
making them version dependent and bumping the version will break backwards
migration compatibility.
The best way is to:
a) Add a new property/compatibility/function in the same way for subsections
above.
b) replace the VMSTATE macro with the _TEST version of the macro, e.g.:
VMSTATE_UINT32(foo, barstruct)
becomes
VMSTATE_UINT32_TEST(foo, barstruct, pre_version_baz)
Sometime in the future when we no longer care about the ancient
versions these can be killed off.
= Return path =
In most migration scenarios there is only a single data path that runs
@@ -482,3 +540,16 @@ request for a page that has already been sent is ignored. Duplicate requests
such as this can happen as a page is sent at about the same time the
destination accesses it.
=== Postcopy with hugepages ===
Postcopy now works with hugetlbfs backed memory:
a) The linux kernel on the destination must support userfault on hugepages.
b) The huge-page configuration on the source and destination VMs must be
identical; i.e. RAMBlocks on both sides must use the same page size.
c) Note that -mem-path /dev/hugepages will fall back to allocating normal
RAM if it doesn't have enough hugepages, triggering (b) to fail.
Using -mem-prealloc enforces the allocation using hugepages.
d) Care should be taken with the size of hugepage used; postcopy with 2MB
hugepages works well, however 1GB hugepages are likely to be problematic
since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link,
and until the full page is transferred the destination thread is blocked.

View File

@@ -225,3 +225,10 @@ recording the virtual machine this filter puts all packets coming from
the outer world into the log. In replay mode packets from the log are
injected into the network device. All interactions with network backend
in replay mode are disabled.
Audio devices
-------------
Audio data is recorded and replay automatically. The command line for recording
and replaying must contain identical specifications of audio hardware, e.g.:
-soundhw ac97

2
dtc

Submodule dtc updated: ec02b34c05...fa8bc7f928

83
exec.c
View File

@@ -45,6 +45,12 @@
#include "exec/address-spaces.h"
#include "sysemu/xen-mapcache.h"
#include "trace-root.h"
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
#include <fcntl.h>
#include <linux/falloc.h>
#endif
#endif
#include "exec/cpu-all.h"
#include "qemu/rcu_queue.h"
@@ -1518,6 +1524,19 @@ size_t qemu_ram_pagesize(RAMBlock *rb)
return rb->page_size;
}
/* Returns the largest size of page in use */
size_t qemu_ram_pagesize_largest(void)
{
RAMBlock *block;
size_t largest = 0;
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
largest = MAX(largest, qemu_ram_pagesize(block));
}
return largest;
}
static int memory_try_enable_merging(void *addr, size_t len)
{
if (!machine_mem_merge(current_machine)) {
@@ -3294,4 +3313,68 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
rcu_read_unlock();
return ret;
}
/*
* Unmap pages of memory from start to start+length such that
* they a) read as 0, b) Trigger whatever fault mechanism
* the OS provides for postcopy.
* The pages must be unmapped by the end of the function.
* Returns: 0 on success, none-0 on failure
*
*/
int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
{
int ret = -1;
uint8_t *host_startaddr = rb->host + start;
if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
error_report("ram_block_discard_range: Unaligned start address: %p",
host_startaddr);
goto err;
}
if ((start + length) <= rb->used_length) {
uint8_t *host_endaddr = host_startaddr + length;
if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
error_report("ram_block_discard_range: Unaligned end address: %p",
host_endaddr);
goto err;
}
errno = ENOTSUP; /* If we are missing MADVISE etc */
if (rb->page_size == qemu_host_page_size) {
#if defined(CONFIG_MADVISE)
/* Note: We need the madvise MADV_DONTNEED behaviour of definitely
* freeing the page.
*/
ret = madvise(host_startaddr, length, MADV_DONTNEED);
#endif
} else {
/* Huge page case - unfortunately it can't do DONTNEED, but
* it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
* huge page file.
*/
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
start, length);
#endif
}
if (ret) {
ret = -errno;
error_report("ram_block_discard_range: Failed to discard range "
"%s:%" PRIx64 " +%zx (%d)",
rb->idstr, start, length, ret);
}
} else {
error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
"/%zx/" RAM_ADDR_FMT")",
rb->idstr, start, length, rb->used_length);
}
err:
return ret;
}
#endif

33
hmp.c
View File

@@ -2045,13 +2045,17 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
const char* device = qdict_get_str(qdict, "device");
const char* command = qdict_get_str(qdict, "command");
Error *err = NULL;
int ret;
blk = blk_by_name(device);
if (!blk) {
BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
if (bs) {
blk = local_blk = blk_new();
blk_insert_bs(blk, bs);
blk = local_blk = blk_new(0, BLK_PERM_ALL);
ret = blk_insert_bs(blk, bs, &err);
if (ret < 0) {
goto fail;
}
} else {
goto fail;
}
@@ -2060,6 +2064,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
aio_context = blk_get_aio_context(blk);
aio_context_acquire(aio_context);
/*
* Notably absent: Proper permission management. This is sad, but it seems
* almost impossible to achieve without changing the semantics and thereby
* limiting the use cases of the qemu-io HMP command.
*
* In an ideal world we would unconditionally create a new BlockBackend for
* qemuio_command(), but we have commands like 'reopen' and want them to
* take effect on the exact BlockBackend whose name the user passed instead
* of just on a temporary copy of it.
*
* Another problem is that deleting the temporary BlockBackend involves
* draining all requests on it first, but some qemu-iotests cases want to
* issue multiple aio_read/write requests and expect them to complete in
* the background while the monitor has already returned.
*
* This is also what prevents us from saving the original permissions and
* restoring them later: We can't revoke permissions until all requests
* have completed, and we don't know when that is nor can we really let
* anything else run before we have revoken them to avoid race conditions.
*
* What happens now is that command() in qemu-io-cmds.c can extend the
* permissions if necessary for the qemu-io command. And they simply stay
* extended, possibly resulting in a read-only guest device keeping write
* permissions. Ugly, but it appears to be the lesser evil.
*/
qemuio_command(blk, command);
aio_context_release(aio_context);

File diff suppressed because it is too large Load Diff

20
hw/9pfs/9p-local.h Normal file
View File

@@ -0,0 +1,20 @@
/*
* 9p local backend utilities
*
* Copyright IBM, Corp. 2017
*
* Authors:
* Greg Kurz <groug@kaod.org>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef QEMU_9P_LOCAL_H
#define QEMU_9P_LOCAL_H
int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
mode_t mode);
int local_opendir_nofollow(FsContext *fs_ctx, const char *path);
#endif

View File

@@ -25,13 +25,7 @@
static ssize_t mp_pacl_getxattr(FsContext *ctx, const char *path,
const char *name, void *value, size_t size)
{
char *buffer;
ssize_t ret;
buffer = rpath(ctx, path);
ret = lgetxattr(buffer, MAP_ACL_ACCESS, value, size);
g_free(buffer);
return ret;
return local_getxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size);
}
static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
@@ -56,23 +50,16 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
static int mp_pacl_setxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags)
{
char *buffer;
int ret;
buffer = rpath(ctx, path);
ret = lsetxattr(buffer, MAP_ACL_ACCESS, value, size, flags);
g_free(buffer);
return ret;
return local_setxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size,
flags);
}
static int mp_pacl_removexattr(FsContext *ctx,
const char *path, const char *name)
{
int ret;
char *buffer;
buffer = rpath(ctx, path);
ret = lremovexattr(buffer, MAP_ACL_ACCESS);
ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS);
if (ret == -1 && errno == ENODATA) {
/*
* We don't get ENODATA error when trying to remove a
@@ -82,20 +69,13 @@ static int mp_pacl_removexattr(FsContext *ctx,
errno = 0;
ret = 0;
}
g_free(buffer);
return ret;
}
static ssize_t mp_dacl_getxattr(FsContext *ctx, const char *path,
const char *name, void *value, size_t size)
{
char *buffer;
ssize_t ret;
buffer = rpath(ctx, path);
ret = lgetxattr(buffer, MAP_ACL_DEFAULT, value, size);
g_free(buffer);
return ret;
return local_getxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size);
}
static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
@@ -120,23 +100,16 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
static int mp_dacl_setxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags)
{
char *buffer;
int ret;
buffer = rpath(ctx, path);
ret = lsetxattr(buffer, MAP_ACL_DEFAULT, value, size, flags);
g_free(buffer);
return ret;
return local_setxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size,
flags);
}
static int mp_dacl_removexattr(FsContext *ctx,
const char *path, const char *name)
{
int ret;
char *buffer;
buffer = rpath(ctx, path);
ret = lremovexattr(buffer, MAP_ACL_DEFAULT);
ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT);
if (ret == -1 && errno == ENODATA) {
/*
* We don't get ENODATA error when trying to remove a
@@ -146,7 +119,6 @@ static int mp_dacl_removexattr(FsContext *ctx,
errno = 0;
ret = 0;
}
g_free(buffer);
return ret;
}

69
hw/9pfs/9p-util.c Normal file
View File

@@ -0,0 +1,69 @@
/*
* 9p utilities
*
* Copyright IBM, Corp. 2017
*
* Authors:
* Greg Kurz <groug@kaod.org>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu/xattr.h"
#include "9p-util.h"
int relative_openat_nofollow(int dirfd, const char *path, int flags,
mode_t mode)
{
int fd;
fd = dup(dirfd);
if (fd == -1) {
return -1;
}
while (*path) {
const char *c;
int next_fd;
char *head;
/* Only relative paths without consecutive slashes */
assert(path[0] != '/');
head = g_strdup(path);
c = strchr(path, '/');
if (c) {
head[c - path] = 0;
next_fd = openat_dir(fd, head);
} else {
next_fd = openat_file(fd, head, flags, mode);
}
g_free(head);
if (next_fd == -1) {
close_preserve_errno(fd);
return -1;
}
close(fd);
fd = next_fd;
if (!c) {
break;
}
path = c + 1;
}
return fd;
}
ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
void *value, size_t size)
{
char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
int ret;
ret = lgetxattr(proc_path, name, value, size);
g_free(proc_path);
return ret;
}

54
hw/9pfs/9p-util.h Normal file
View File

@@ -0,0 +1,54 @@
/*
* 9p utilities
*
* Copyright IBM, Corp. 2017
*
* Authors:
* Greg Kurz <groug@kaod.org>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef QEMU_9P_UTIL_H
#define QEMU_9P_UTIL_H
static inline void close_preserve_errno(int fd)
{
int serrno = errno;
close(fd);
errno = serrno;
}
static inline int openat_dir(int dirfd, const char *name)
{
return openat(dirfd, name, O_DIRECTORY | O_RDONLY | O_PATH);
}
static inline int openat_file(int dirfd, const char *name, int flags,
mode_t mode)
{
int fd, serrno, ret;
fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK,
mode);
if (fd == -1) {
return -1;
}
serrno = errno;
/* O_NONBLOCK was only needed to open the file. Let's drop it. */
ret = fcntl(fd, F_SETFL, flags);
assert(!ret);
errno = serrno;
return fd;
}
int relative_openat_nofollow(int dirfd, const char *path, int flags,
mode_t mode);
ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name,
void *value, size_t size);
int fsetxattrat_nofollow(int dirfd, const char *path, const char *name,
void *value, size_t size, int flags);
#endif

View File

@@ -20,9 +20,6 @@
static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
const char *name, void *value, size_t size)
{
char *buffer;
ssize_t ret;
if (strncmp(name, "user.virtfs.", 12) == 0) {
/*
* Don't allow fetch of user.virtfs namesapce
@@ -31,10 +28,7 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
errno = ENOATTR;
return -1;
}
buffer = rpath(ctx, path);
ret = lgetxattr(buffer, name, value, size);
g_free(buffer);
return ret;
return local_getxattr_nofollow(ctx, path, name, value, size);
}
static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
@@ -73,9 +67,6 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags)
{
char *buffer;
int ret;
if (strncmp(name, "user.virtfs.", 12) == 0) {
/*
* Don't allow fetch of user.virtfs namesapce
@@ -84,18 +75,12 @@ static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
errno = EACCES;
return -1;
}
buffer = rpath(ctx, path);
ret = lsetxattr(buffer, name, value, size, flags);
g_free(buffer);
return ret;
return local_setxattr_nofollow(ctx, path, name, value, size, flags);
}
static int mp_user_removexattr(FsContext *ctx,
const char *path, const char *name)
{
char *buffer;
int ret;
if (strncmp(name, "user.virtfs.", 12) == 0) {
/*
* Don't allow fetch of user.virtfs namesapce
@@ -104,10 +89,7 @@ static int mp_user_removexattr(FsContext *ctx,
errno = EACCES;
return -1;
}
buffer = rpath(ctx, path);
ret = lremovexattr(buffer, name);
g_free(buffer);
return ret;
return local_removexattr_nofollow(ctx, path, name);
}
XattrOperations mapped_user_xattr = {

View File

@@ -15,6 +15,8 @@
#include "9p.h"
#include "fsdev/file-op-9p.h"
#include "9p-xattr.h"
#include "9p-util.h"
#include "9p-local.h"
static XattrOperations *get_xattr_operations(XattrOperations **h,
@@ -58,6 +60,16 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path,
return name_size;
}
static ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
char *list, size_t size)
{
char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
int ret;
ret = llistxattr(proc_path, list, size);
g_free(proc_path);
return ret;
}
/*
* Get the list and pass to each layer to find out whether
@@ -67,24 +79,37 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path,
void *value, size_t vsize)
{
ssize_t size = 0;
char *buffer;
void *ovalue = value;
XattrOperations *xops;
char *orig_value, *orig_value_start;
ssize_t xattr_len, parsed_len = 0, attr_len;
char *dirpath, *name;
int dirfd;
/* Get the actual len */
buffer = rpath(ctx, path);
xattr_len = llistxattr(buffer, value, 0);
dirpath = g_path_get_dirname(path);
dirfd = local_opendir_nofollow(ctx, dirpath);
g_free(dirpath);
if (dirfd == -1) {
return -1;
}
name = g_path_get_basename(path);
xattr_len = flistxattrat_nofollow(dirfd, name, value, 0);
if (xattr_len <= 0) {
g_free(buffer);
g_free(name);
close_preserve_errno(dirfd);
return xattr_len;
}
/* Now fetch the xattr and find the actual size */
orig_value = g_malloc(xattr_len);
xattr_len = llistxattr(buffer, orig_value, xattr_len);
g_free(buffer);
xattr_len = flistxattrat_nofollow(dirfd, name, orig_value, xattr_len);
g_free(name);
close_preserve_errno(dirfd);
if (xattr_len < 0) {
return -1;
}
/* store the orig pointer */
orig_value_start = orig_value;
@@ -143,6 +168,135 @@ int v9fs_remove_xattr(FsContext *ctx,
}
ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
const char *name, void *value, size_t size)
{
char *dirpath = g_path_get_dirname(path);
char *filename = g_path_get_basename(path);
int dirfd;
ssize_t ret = -1;
dirfd = local_opendir_nofollow(ctx, dirpath);
if (dirfd == -1) {
goto out;
}
ret = fgetxattrat_nofollow(dirfd, filename, name, value, size);
close_preserve_errno(dirfd);
out:
g_free(dirpath);
g_free(filename);
return ret;
}
ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size)
{
return local_getxattr_nofollow(ctx, path, name, value, size);
}
int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
void *value, size_t size, int flags)
{
char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
int ret;
ret = lsetxattr(proc_path, name, value, size, flags);
g_free(proc_path);
return ret;
}
ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
const char *name, void *value, size_t size,
int flags)
{
char *dirpath = g_path_get_dirname(path);
char *filename = g_path_get_basename(path);
int dirfd;
ssize_t ret = -1;
dirfd = local_opendir_nofollow(ctx, dirpath);
if (dirfd == -1) {
goto out;
}
ret = fsetxattrat_nofollow(dirfd, filename, name, value, size, flags);
close_preserve_errno(dirfd);
out:
g_free(dirpath);
g_free(filename);
return ret;
}
int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
size_t size, int flags)
{
return local_setxattr_nofollow(ctx, path, name, value, size, flags);
}
static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
const char *name)
{
char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
int ret;
ret = lremovexattr(proc_path, name);
g_free(proc_path);
return ret;
}
ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
const char *name)
{
char *dirpath = g_path_get_dirname(path);
char *filename = g_path_get_basename(path);
int dirfd;
ssize_t ret = -1;
dirfd = local_opendir_nofollow(ctx, dirpath);
if (dirfd == -1) {
goto out;
}
ret = fremovexattrat_nofollow(dirfd, filename, name);
close_preserve_errno(dirfd);
out:
g_free(dirpath);
g_free(filename);
return ret;
}
int pt_removexattr(FsContext *ctx, const char *path, const char *name)
{
return local_removexattr_nofollow(ctx, path, name);
}
ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size)
{
errno = ENOTSUP;
return -1;
}
int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags)
{
errno = ENOTSUP;
return -1;
}
ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
void *value, size_t size)
{
return 0;
}
int notsup_removexattr(FsContext *ctx, const char *path, const char *name)
{
errno = ENOTSUP;
return -1;
}
XattrOperations *mapped_xattr_ops[] = {
&mapped_user_xattr,
&mapped_pacl_xattr,

View File

@@ -29,6 +29,13 @@ typedef struct xattr_operations
const char *path, const char *name);
} XattrOperations;
ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
const char *name, void *value, size_t size);
ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
const char *name, void *value, size_t size,
int flags);
ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
const char *name);
extern XattrOperations mapped_user_xattr;
extern XattrOperations passthrough_user_xattr;
@@ -49,73 +56,21 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value,
int v9fs_set_xattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags);
int v9fs_remove_xattr(FsContext *ctx, const char *path, const char *name);
ssize_t pt_listxattr(FsContext *ctx, const char *path, char *name, void *value,
size_t size);
ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size);
int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
size_t size, int flags);
int pt_removexattr(FsContext *ctx, const char *path, const char *name);
static inline ssize_t pt_getxattr(FsContext *ctx, const char *path,
const char *name, void *value, size_t size)
{
char *buffer;
ssize_t ret;
buffer = rpath(ctx, path);
ret = lgetxattr(buffer, name, value, size);
g_free(buffer);
return ret;
}
static inline int pt_setxattr(FsContext *ctx, const char *path,
const char *name, void *value,
size_t size, int flags)
{
char *buffer;
int ret;
buffer = rpath(ctx, path);
ret = lsetxattr(buffer, name, value, size, flags);
g_free(buffer);
return ret;
}
static inline int pt_removexattr(FsContext *ctx,
const char *path, const char *name)
{
char *buffer;
int ret;
buffer = rpath(ctx, path);
ret = lremovexattr(path, name);
g_free(buffer);
return ret;
}
static inline ssize_t notsup_getxattr(FsContext *ctx, const char *path,
const char *name, void *value,
size_t size)
{
errno = ENOTSUP;
return -1;
}
static inline int notsup_setxattr(FsContext *ctx, const char *path,
const char *name, void *value,
size_t size, int flags)
{
errno = ENOTSUP;
return -1;
}
static inline ssize_t notsup_listxattr(FsContext *ctx, const char *path,
char *name, void *value, size_t size)
{
return 0;
}
static inline int notsup_removexattr(FsContext *ctx,
const char *path, const char *name)
{
errno = ENOTSUP;
return -1;
}
ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size);
int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags);
ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
void *value, size_t size);
int notsup_removexattr(FsContext *ctx, const char *path, const char *name);
#endif

View File

@@ -1,4 +1,4 @@
common-obj-y = 9p.o
common-obj-y = 9p.o 9p-util.o
common-obj-y += 9p-local.o 9p-xattr.o
common-obj-y += 9p-xattr-user.o 9p-posix-acl.o
common-obj-y += coth.o cofs.o codir.o cofile.o

View File

@@ -49,7 +49,6 @@
#define ACPI_PCIHP_ADDR 0xae00
#define ACPI_PCIHP_SIZE 0x0014
#define ACPI_PCIHP_LEGACY_SIZE 0x000f
#define PCI_UP_BASE 0x0000
#define PCI_DOWN_BASE 0x0004
#define PCI_EJ_BASE 0x0008
@@ -302,16 +301,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
s->root= root_bus;
s->legacy_piix = !bridges_enabled;
if (s->legacy_piix) {
unsigned *bus_bsel = g_malloc(sizeof *bus_bsel);
s->io_len = ACPI_PCIHP_LEGACY_SIZE;
*bus_bsel = ACPI_PCIHP_BSEL_DEFAULT;
object_property_add_uint32_ptr(OBJECT(root_bus), ACPI_PCIHP_PROP_BSEL,
bus_bsel, NULL);
}
memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s,
"acpi-pci-hotplug", s->io_len);
memory_region_add_subregion(address_space_io, s->io_base, &s->io);

View File

@@ -440,6 +440,8 @@ static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque)
{
PIIX4PMState *s = opaque;
/* pci_bus cannot outlive PIIX4PMState, because /machine keeps it alive
* and it's not hot-unpluggable */
qbus_set_hotplug_handler(BUS(pci_bus), DEVICE(s), &error_abort);
}

View File

@@ -8,6 +8,7 @@
*/
#include "qemu/osdep.h"
#include "hw/arm/armv7m.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "cpu.h"
@@ -17,148 +18,261 @@
#include "elf.h"
#include "sysemu/qtest.h"
#include "qemu/error-report.h"
#include "exec/address-spaces.h"
/* Bitbanded IO. Each word corresponds to a single bit. */
/* Get the byte address of the real memory for a bitband access. */
static inline uint32_t bitband_addr(void * opaque, uint32_t addr)
static inline hwaddr bitband_addr(BitBandState *s, hwaddr offset)
{
uint32_t res;
res = *(uint32_t *)opaque;
res |= (addr & 0x1ffffff) >> 5;
return res;
return s->base | (offset & 0x1ffffff) >> 5;
}
static uint32_t bitband_readb(void *opaque, hwaddr offset)
static MemTxResult bitband_read(void *opaque, hwaddr offset,
uint64_t *data, unsigned size, MemTxAttrs attrs)
{
uint8_t v;
cpu_physical_memory_read(bitband_addr(opaque, offset), &v, 1);
return (v & (1 << ((offset >> 2) & 7))) != 0;
BitBandState *s = opaque;
uint8_t buf[4];
MemTxResult res;
int bitpos, bit;
hwaddr addr;
assert(size <= 4);
/* Find address in underlying memory and round down to multiple of size */
addr = bitband_addr(s, offset) & (-size);
res = address_space_read(s->source_as, addr, attrs, buf, size);
if (res) {
return res;
}
/* Bit position in the N bytes read... */
bitpos = (offset >> 2) & ((size * 8) - 1);
/* ...converted to byte in buffer and bit in byte */
bit = (buf[bitpos >> 3] >> (bitpos & 7)) & 1;
*data = bit;
return MEMTX_OK;
}
static void bitband_writeb(void *opaque, hwaddr offset,
uint32_t value)
static MemTxResult bitband_write(void *opaque, hwaddr offset, uint64_t value,
unsigned size, MemTxAttrs attrs)
{
uint32_t addr;
uint8_t mask;
uint8_t v;
addr = bitband_addr(opaque, offset);
mask = (1 << ((offset >> 2) & 7));
cpu_physical_memory_read(addr, &v, 1);
if (value & 1)
v |= mask;
else
v &= ~mask;
cpu_physical_memory_write(addr, &v, 1);
}
BitBandState *s = opaque;
uint8_t buf[4];
MemTxResult res;
int bitpos, bit;
hwaddr addr;
static uint32_t bitband_readw(void *opaque, hwaddr offset)
{
uint32_t addr;
uint16_t mask;
uint16_t v;
addr = bitband_addr(opaque, offset) & ~1;
mask = (1 << ((offset >> 2) & 15));
mask = tswap16(mask);
cpu_physical_memory_read(addr, &v, 2);
return (v & mask) != 0;
}
assert(size <= 4);
static void bitband_writew(void *opaque, hwaddr offset,
uint32_t value)
{
uint32_t addr;
uint16_t mask;
uint16_t v;
addr = bitband_addr(opaque, offset) & ~1;
mask = (1 << ((offset >> 2) & 15));
mask = tswap16(mask);
cpu_physical_memory_read(addr, &v, 2);
if (value & 1)
v |= mask;
else
v &= ~mask;
cpu_physical_memory_write(addr, &v, 2);
}
static uint32_t bitband_readl(void *opaque, hwaddr offset)
{
uint32_t addr;
uint32_t mask;
uint32_t v;
addr = bitband_addr(opaque, offset) & ~3;
mask = (1 << ((offset >> 2) & 31));
mask = tswap32(mask);
cpu_physical_memory_read(addr, &v, 4);
return (v & mask) != 0;
}
static void bitband_writel(void *opaque, hwaddr offset,
uint32_t value)
{
uint32_t addr;
uint32_t mask;
uint32_t v;
addr = bitband_addr(opaque, offset) & ~3;
mask = (1 << ((offset >> 2) & 31));
mask = tswap32(mask);
cpu_physical_memory_read(addr, &v, 4);
if (value & 1)
v |= mask;
else
v &= ~mask;
cpu_physical_memory_write(addr, &v, 4);
/* Find address in underlying memory and round down to multiple of size */
addr = bitband_addr(s, offset) & (-size);
res = address_space_read(s->source_as, addr, attrs, buf, size);
if (res) {
return res;
}
/* Bit position in the N bytes read... */
bitpos = (offset >> 2) & ((size * 8) - 1);
/* ...converted to byte in buffer and bit in byte */
bit = 1 << (bitpos & 7);
if (value & 1) {
buf[bitpos >> 3] |= bit;
} else {
buf[bitpos >> 3] &= ~bit;
}
return address_space_write(s->source_as, addr, attrs, buf, size);
}
static const MemoryRegionOps bitband_ops = {
.old_mmio = {
.read = { bitband_readb, bitband_readw, bitband_readl, },
.write = { bitband_writeb, bitband_writew, bitband_writel, },
},
.read_with_attrs = bitband_read,
.write_with_attrs = bitband_write,
.endianness = DEVICE_NATIVE_ENDIAN,
.impl.min_access_size = 1,
.impl.max_access_size = 4,
.valid.min_access_size = 1,
.valid.max_access_size = 4,
};
#define TYPE_BITBAND "ARM,bitband-memory"
#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND)
typedef struct {
/*< private >*/
SysBusDevice parent_obj;
/*< public >*/
MemoryRegion iomem;
uint32_t base;
} BitBandState;
static void bitband_init(Object *obj)
{
BitBandState *s = BITBAND(obj);
SysBusDevice *dev = SYS_BUS_DEVICE(obj);
memory_region_init_io(&s->iomem, obj, &bitband_ops, &s->base,
object_property_add_link(obj, "source-memory",
TYPE_MEMORY_REGION,
(Object **)&s->source_memory,
qdev_prop_allow_set_link_before_realize,
OBJ_PROP_LINK_UNREF_ON_RELEASE,
&error_abort);
memory_region_init_io(&s->iomem, obj, &bitband_ops, s,
"bitband", 0x02000000);
sysbus_init_mmio(dev, &s->iomem);
}
static void armv7m_bitband_init(void)
static void bitband_realize(DeviceState *dev, Error **errp)
{
DeviceState *dev;
BitBandState *s = BITBAND(dev);
dev = qdev_create(NULL, TYPE_BITBAND);
qdev_prop_set_uint32(dev, "base", 0x20000000);
qdev_init_nofail(dev);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x22000000);
if (!s->source_memory) {
error_setg(errp, "source-memory property not set");
return;
}
dev = qdev_create(NULL, TYPE_BITBAND);
qdev_prop_set_uint32(dev, "base", 0x40000000);
qdev_init_nofail(dev);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x42000000);
s->source_as = address_space_init_shareable(s->source_memory,
"bitband-source");
}
/* Board init. */
static const hwaddr bitband_input_addr[ARMV7M_NUM_BITBANDS] = {
0x20000000, 0x40000000
};
static const hwaddr bitband_output_addr[ARMV7M_NUM_BITBANDS] = {
0x22000000, 0x42000000
};
static void armv7m_instance_init(Object *obj)
{
ARMv7MState *s = ARMV7M(obj);
int i;
/* Can't init the cpu here, we don't yet know which model to use */
object_property_add_link(obj, "memory",
TYPE_MEMORY_REGION,
(Object **)&s->board_memory,
qdev_prop_allow_set_link_before_realize,
OBJ_PROP_LINK_UNREF_ON_RELEASE,
&error_abort);
memory_region_init(&s->container, obj, "armv7m-container", UINT64_MAX);
object_initialize(&s->nvic, sizeof(s->nvic), "armv7m_nvic");
qdev_set_parent_bus(DEVICE(&s->nvic), sysbus_get_default());
object_property_add_alias(obj, "num-irq",
OBJECT(&s->nvic), "num-irq", &error_abort);
for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
object_initialize(&s->bitband[i], sizeof(s->bitband[i]), TYPE_BITBAND);
qdev_set_parent_bus(DEVICE(&s->bitband[i]), sysbus_get_default());
}
}
static void armv7m_realize(DeviceState *dev, Error **errp)
{
ARMv7MState *s = ARMV7M(dev);
SysBusDevice *sbd;
Error *err = NULL;
int i;
char **cpustr;
ObjectClass *oc;
const char *typename;
CPUClass *cc;
if (!s->board_memory) {
error_setg(errp, "memory property was not set");
return;
}
memory_region_add_subregion_overlap(&s->container, 0, s->board_memory, -1);
cpustr = g_strsplit(s->cpu_model, ",", 2);
oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]);
if (!oc) {
error_setg(errp, "Unknown CPU model %s", cpustr[0]);
g_strfreev(cpustr);
return;
}
cc = CPU_CLASS(oc);
typename = object_class_get_name(oc);
cc->parse_features(typename, cpustr[1], &err);
g_strfreev(cpustr);
if (err) {
error_propagate(errp, err);
return;
}
s->cpu = ARM_CPU(object_new(typename));
if (!s->cpu) {
error_setg(errp, "Unknown CPU model %s", s->cpu_model);
return;
}
object_property_set_link(OBJECT(s->cpu), OBJECT(&s->container), "memory",
&error_abort);
object_property_set_bool(OBJECT(s->cpu), true, "realized", &err);
if (err != NULL) {
error_propagate(errp, err);
return;
}
/* Note that we must realize the NVIC after the CPU */
object_property_set_bool(OBJECT(&s->nvic), true, "realized", &err);
if (err != NULL) {
error_propagate(errp, err);
return;
}
/* Alias the NVIC's input and output GPIOs as our own so the board
* code can wire them up. (We do this in realize because the
* NVIC doesn't create the input GPIO array until realize.)
*/
qdev_pass_gpios(DEVICE(&s->nvic), dev, NULL);
qdev_pass_gpios(DEVICE(&s->nvic), dev, "SYSRESETREQ");
/* Wire the NVIC up to the CPU */
sbd = SYS_BUS_DEVICE(&s->nvic);
sysbus_connect_irq(sbd, 0,
qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
s->cpu->env.nvic = &s->nvic;
memory_region_add_subregion(&s->container, 0xe000e000,
sysbus_mmio_get_region(sbd, 0));
for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
Object *obj = OBJECT(&s->bitband[i]);
SysBusDevice *sbd = SYS_BUS_DEVICE(&s->bitband[i]);
object_property_set_int(obj, bitband_input_addr[i], "base", &err);
if (err != NULL) {
error_propagate(errp, err);
return;
}
object_property_set_link(obj, OBJECT(s->board_memory),
"source-memory", &error_abort);
object_property_set_bool(obj, true, "realized", &err);
if (err != NULL) {
error_propagate(errp, err);
return;
}
memory_region_add_subregion(&s->container, bitband_output_addr[i],
sysbus_mmio_get_region(sbd, 0));
}
}
static Property armv7m_properties[] = {
DEFINE_PROP_STRING("cpu-model", ARMv7MState, cpu_model),
DEFINE_PROP_END_OF_LIST(),
};
static void armv7m_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = armv7m_realize;
dc->props = armv7m_properties;
}
static const TypeInfo armv7m_info = {
.name = TYPE_ARMV7M,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(ARMv7MState),
.instance_init = armv7m_instance_init,
.class_init = armv7m_class_init,
};
static void armv7m_reset(void *opaque)
{
ARMCPU *cpu = opaque;
@@ -168,38 +282,36 @@ static void armv7m_reset(void *opaque)
/* Init CPU and memory for a v7-M based board.
mem_size is in bytes.
Returns the NVIC array. */
Returns the ARMv7M device. */
DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
const char *kernel_filename, const char *cpu_model)
{
ARMCPU *cpu;
CPUARMState *env;
DeviceState *nvic;
DeviceState *armv7m;
if (cpu_model == NULL) {
cpu_model = "cortex-m3";
}
armv7m = qdev_create(NULL, "armv7m");
qdev_prop_set_uint32(armv7m, "num-irq", num_irq);
qdev_prop_set_string(armv7m, "cpu-model", cpu_model);
object_property_set_link(OBJECT(armv7m), OBJECT(get_system_memory()),
"memory", &error_abort);
/* This will exit with an error if the user passed us a bad cpu_model */
qdev_init_nofail(armv7m);
armv7m_load_kernel(ARM_CPU(first_cpu), kernel_filename, mem_size);
return armv7m;
}
void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size)
{
int image_size;
uint64_t entry;
uint64_t lowaddr;
int big_endian;
if (cpu_model == NULL) {
cpu_model = "cortex-m3";
}
cpu = cpu_arm_init(cpu_model);
if (cpu == NULL) {
fprintf(stderr, "Unable to find CPU definition\n");
exit(1);
}
env = &cpu->env;
armv7m_bitband_init();
nvic = qdev_create(NULL, "armv7m_nvic");
qdev_prop_set_uint32(nvic, "num-irq", num_irq);
env->nvic = nvic;
qdev_init_nofail(nvic);
sysbus_connect_irq(SYS_BUS_DEVICE(nvic), 0,
qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ));
#ifdef TARGET_WORDS_BIGENDIAN
big_endian = 1;
#else
@@ -224,8 +336,15 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
}
}
/* CPU objects (unlike devices) are not automatically reset on system
* reset, so we must always register a handler to do so. Unlike
* A-profile CPUs, we don't need to do anything special in the
* handler to arrange that it starts correctly.
* This is arguably the wrong place to do this, but it matches the
* way A-profile does it. Note that this means that every M profile
* board must call this function!
*/
qemu_register_reset(armv7m_reset, cpu);
return nvic;
}
static Property bitband_properties[] = {
@@ -237,6 +356,7 @@ static void bitband_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = bitband_realize;
dc->props = bitband_properties;
}
@@ -251,6 +371,7 @@ static const TypeInfo bitband_info = {
static void armv7m_register_types(void)
{
type_register_static(&bitband_info);
type_register_static(&armv7m_info);
}
type_init(armv7m_register_types)

View File

@@ -96,6 +96,11 @@ static void bcm2835_peripherals_init(Object *obj)
object_property_add_child(obj, "sdhci", OBJECT(&s->sdhci), NULL);
qdev_set_parent_bus(DEVICE(&s->sdhci), sysbus_get_default());
/* SDHOST */
object_initialize(&s->sdhost, sizeof(s->sdhost), TYPE_BCM2835_SDHOST);
object_property_add_child(obj, "sdhost", OBJECT(&s->sdhost), NULL);
qdev_set_parent_bus(DEVICE(&s->sdhost), sysbus_get_default());
/* DMA Channels */
object_initialize(&s->dma, sizeof(s->dma), TYPE_BCM2835_DMA);
object_property_add_child(obj, "dma", OBJECT(&s->dma), NULL);
@@ -103,6 +108,16 @@ static void bcm2835_peripherals_init(Object *obj)
object_property_add_const_link(OBJECT(&s->dma), "dma-mr",
OBJECT(&s->gpu_bus_mr), &error_abort);
/* GPIO */
object_initialize(&s->gpio, sizeof(s->gpio), TYPE_BCM2835_GPIO);
object_property_add_child(obj, "gpio", OBJECT(&s->gpio), NULL);
qdev_set_parent_bus(DEVICE(&s->gpio), sysbus_get_default());
object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhci",
OBJECT(&s->sdhci.sdbus), &error_abort);
object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost",
OBJECT(&s->sdhost.sdbus), &error_abort);
}
static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
@@ -267,13 +282,20 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci), 0,
qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
INTERRUPT_ARASANSDIO));
object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->sdhci), "sd-bus",
&err);
/* SDHOST */
object_property_set_bool(OBJECT(&s->sdhost), true, "realized", &err);
if (err) {
error_propagate(errp, err);
return;
}
memory_region_add_subregion(&s->peri_mr, MMCI0_OFFSET,
sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->sdhost), 0));
sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhost), 0,
qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
INTERRUPT_SDIO));
/* DMA Channels */
object_property_set_bool(OBJECT(&s->dma), true, "realized", &err);
if (err) {
@@ -292,6 +314,23 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
BCM2835_IC_GPU_IRQ,
INTERRUPT_DMA0 + n));
}
/* GPIO */
object_property_set_bool(OBJECT(&s->gpio), true, "realized", &err);
if (err) {
error_propagate(errp, err);
return;
}
memory_region_add_subregion(&s->peri_mr, GPIO_OFFSET,
sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->gpio), 0));
object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->gpio), "sd-bus",
&err);
if (err) {
error_propagate(errp, err);
return;
}
}
static void bcm2835_peripherals_class_init(ObjectClass *oc, void *data)

View File

@@ -27,17 +27,18 @@
#include "hw/boards.h"
#include "qemu/error-report.h"
#include "hw/arm/stm32f205_soc.h"
#include "hw/arm/arm.h"
static void netduino2_init(MachineState *machine)
{
DeviceState *dev;
dev = qdev_create(NULL, TYPE_STM32F205_SOC);
if (machine->kernel_filename) {
qdev_prop_set_string(dev, "kernel-filename", machine->kernel_filename);
}
qdev_prop_set_string(dev, "cpu-model", "cortex-m3");
object_property_set_bool(OBJECT(dev), true, "realized", &error_fatal);
armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
FLASH_SIZE);
}
static void netduino2_machine_init(MachineClass *mc)

View File

@@ -49,6 +49,9 @@ static void stm32f205_soc_initfn(Object *obj)
STM32F205State *s = STM32F205_SOC(obj);
int i;
object_initialize(&s->armv7m, sizeof(s->armv7m), TYPE_ARMV7M);
qdev_set_parent_bus(DEVICE(&s->armv7m), sysbus_get_default());
object_initialize(&s->syscfg, sizeof(s->syscfg), TYPE_STM32F2XX_SYSCFG);
qdev_set_parent_bus(DEVICE(&s->syscfg), sysbus_get_default());
@@ -82,7 +85,7 @@ static void stm32f205_soc_initfn(Object *obj)
static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
{
STM32F205State *s = STM32F205_SOC(dev_soc);
DeviceState *dev, *nvic;
DeviceState *dev, *armv7m;
SysBusDevice *busdev;
Error *err = NULL;
int i;
@@ -110,8 +113,16 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
vmstate_register_ram_global(sram);
memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram);
nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96,
s->kernel_filename, s->cpu_model);
armv7m = DEVICE(&s->armv7m);
qdev_prop_set_uint32(armv7m, "num-irq", 96);
qdev_prop_set_string(armv7m, "cpu-model", s->cpu_model);
object_property_set_link(OBJECT(&s->armv7m), OBJECT(get_system_memory()),
"memory", &error_abort);
object_property_set_bool(OBJECT(&s->armv7m), true, "realized", &err);
if (err != NULL) {
error_propagate(errp, err);
return;
}
/* System configuration controller */
dev = DEVICE(&s->syscfg);
@@ -122,7 +133,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
}
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, 0x40013800);
sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, 71));
sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, 71));
/* Attach UART (uses USART registers) and USART controllers */
for (i = 0; i < STM_NUM_USARTS; i++) {
@@ -136,7 +147,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
}
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, usart_addr[i]);
sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, usart_irq[i]));
sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i]));
}
/* Timer 2 to 5 */
@@ -150,7 +161,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
}
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, timer_addr[i]);
sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, timer_irq[i]));
sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, timer_irq[i]));
}
/* ADC 1 to 3 */
@@ -162,7 +173,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
return;
}
qdev_connect_gpio_out(DEVICE(s->adc_irqs), 0,
qdev_get_gpio_in(nvic, ADC_IRQ));
qdev_get_gpio_in(armv7m, ADC_IRQ));
for (i = 0; i < STM_NUM_ADCS; i++) {
dev = DEVICE(&(s->adc[i]));
@@ -187,12 +198,11 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
}
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, spi_addr[i]);
sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, spi_irq[i]));
sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, spi_irq[i]));
}
}
static Property stm32f205_soc_properties[] = {
DEFINE_PROP_STRING("kernel-filename", STM32F205State, kernel_filename),
DEFINE_PROP_STRING("cpu-model", STM32F205State, cpu_model),
DEFINE_PROP_END_OF_LIST(),
};

View File

@@ -51,11 +51,33 @@ void blkconf_blocksizes(BlockConf *conf)
}
}
void blkconf_apply_backend_options(BlockConf *conf)
void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
bool resizable, Error **errp)
{
BlockBackend *blk = conf->blk;
BlockdevOnError rerror, werror;
uint64_t perm, shared_perm;
bool wce;
int ret;
perm = BLK_PERM_CONSISTENT_READ;
if (!readonly) {
perm |= BLK_PERM_WRITE;
}
shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_GRAPH_MOD;
if (resizable) {
shared_perm |= BLK_PERM_RESIZE;
}
if (conf->share_rw) {
shared_perm |= BLK_PERM_WRITE;
}
ret = blk_set_perm(blk, perm, shared_perm, errp);
if (ret < 0) {
return;
}
switch (conf->wce) {
case ON_OFF_AUTO_ON: wce = true; break;

View File

@@ -186,6 +186,7 @@ typedef enum FDiskFlags {
struct FDrive {
FDCtrl *fdctrl;
BlockBackend *blk;
BlockConf *conf;
/* Drive status */
FloppyDriveType drive; /* CMOS drive type */
uint8_t perpendicular; /* 2.88 MB access mode */
@@ -469,9 +470,22 @@ static void fd_revalidate(FDrive *drv)
}
}
static void fd_change_cb(void *opaque, bool load)
static void fd_change_cb(void *opaque, bool load, Error **errp)
{
FDrive *drive = opaque;
Error *local_err = NULL;
if (!load) {
blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort);
} else {
blkconf_apply_backend_options(drive->conf,
blk_is_read_only(drive->blk), false,
&local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
}
drive->media_changed = 1;
drive->media_validated = false;
@@ -508,6 +522,7 @@ static int floppy_drive_init(DeviceState *qdev)
FloppyDrive *dev = FLOPPY_DRIVE(qdev);
FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus);
FDrive *drive;
Error *local_err = NULL;
int ret;
if (dev->unit == -1) {
@@ -533,7 +548,7 @@ static int floppy_drive_init(DeviceState *qdev)
if (!dev->conf.blk) {
/* Anonymous BlockBackend for an empty drive */
dev->conf.blk = blk_new();
dev->conf.blk = blk_new(0, BLK_PERM_ALL);
ret = blk_attach_dev(dev->conf.blk, qdev);
assert(ret == 0);
}
@@ -551,7 +566,13 @@ static int floppy_drive_init(DeviceState *qdev)
* blkconf_apply_backend_options(). */
dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO;
dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO;
blkconf_apply_backend_options(&dev->conf);
blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk),
false, &local_err);
if (local_err) {
error_report_err(local_err);
return -1;
}
/* 'enospc' is the default for -drive, 'report' is what blk_new() gives us
* for empty drives. */
@@ -565,6 +586,7 @@ static int floppy_drive_init(DeviceState *qdev)
return -1;
}
drive->conf = &dev->conf;
drive->blk = dev->conf.blk;
drive->fdctrl = bus->fdc;

View File

@@ -1215,6 +1215,7 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
{
Flash *s = M25P80(ss);
M25P80Class *mc = M25P80_GET_CLASS(s);
int ret;
s->pi = mc->pi;
@@ -1222,6 +1223,13 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
s->dirty_page = -1;
if (s->blk) {
uint64_t perm = BLK_PERM_CONSISTENT_READ |
(blk_is_read_only(s->blk) ? 0 : BLK_PERM_WRITE);
ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp);
if (ret < 0) {
return;
}
DB_PRINT_L(0, "Binding to IF_MTD drive\n");
s->storage = blk_blockalign(s->blk, s->size);

View File

@@ -373,6 +373,8 @@ static void nand_realize(DeviceState *dev, Error **errp)
{
int pagesize;
NANDFlashState *s = NAND(dev);
int ret;
s->buswidth = nand_flash_ids[s->chip_id].width >> 3;
s->size = nand_flash_ids[s->chip_id].size << 20;
@@ -407,6 +409,11 @@ static void nand_realize(DeviceState *dev, Error **errp)
error_setg(errp, "Can't use a read-only drive");
return;
}
ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
BLK_PERM_ALL, errp);
if (ret < 0) {
return;
}
if (blk_getlength(s->blk) >=
(s->pages << s->page_shift) + (s->pages << s->oob_shift)) {
pagesize = 0;

View File

@@ -835,6 +835,7 @@ static int nvme_init(PCIDevice *pci_dev)
int i;
int64_t bs_size;
uint8_t *pci_conf;
Error *local_err = NULL;
if (!n->conf.blk) {
return -1;
@@ -850,7 +851,12 @@ static int nvme_init(PCIDevice *pci_dev)
return -1;
}
blkconf_blocksizes(&n->conf);
blkconf_apply_backend_options(&n->conf);
blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
false, &local_err);
if (local_err) {
error_report_err(local_err);
return -1;
}
pci_conf = pci_dev->config;
pci_conf[PCI_INTERRUPT_PIN] = 1;

View File

@@ -778,6 +778,7 @@ static int onenand_initfn(SysBusDevice *sbd)
OneNANDState *s = ONE_NAND(dev);
uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7));
void *ram;
Error *local_err = NULL;
s->base = (hwaddr)-1;
s->rdy = NULL;
@@ -796,6 +797,12 @@ static int onenand_initfn(SysBusDevice *sbd)
error_report("Can't use a read-only drive");
return -1;
}
blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
BLK_PERM_ALL, &local_err);
if (local_err) {
error_report_err(local_err);
return -1;
}
s->blk_cur = s->blk;
}
s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT),

View File

@@ -757,6 +757,18 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
pfl->storage = memory_region_get_ram_ptr(&pfl->mem);
sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
if (pfl->blk) {
uint64_t perm;
pfl->ro = blk_is_read_only(pfl->blk);
perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
if (ret < 0) {
return;
}
} else {
pfl->ro = 0;
}
if (pfl->blk) {
/* read the initial flash content */
ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);
@@ -768,12 +780,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
}
}
if (pfl->blk) {
pfl->ro = blk_is_read_only(pfl->blk);
} else {
pfl->ro = 0;
}
/* Default to devices being used at their maximum device width. This was
* assumed before the device_width support was added.
*/

View File

@@ -632,6 +632,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl));
pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem);
pfl->chip_len = chip_len;
if (pfl->blk) {
uint64_t perm;
pfl->ro = blk_is_read_only(pfl->blk);
perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
if (ret < 0) {
return;
}
} else {
pfl->ro = 0;
}
if (pfl->blk) {
/* read the initial flash content */
ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);
@@ -646,12 +659,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
pfl->rom_mode = 1;
sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
if (pfl->blk) {
pfl->ro = blk_is_read_only(pfl->blk);
} else {
pfl->ro = 0;
}
pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl);
pfl->wcycle = 0;
pfl->cmd = 0;

View File

@@ -928,7 +928,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
}
blkconf_serial(&conf->conf, &conf->serial);
blkconf_apply_backend_options(&conf->conf);
blkconf_apply_backend_options(&conf->conf,
blk_is_read_only(conf->conf.blk), true,
&err);
if (err) {
error_propagate(errp, err);
return;
}
s->original_wce = blk_enable_write_cache(conf->conf.blk);
blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err);
if (err) {

View File

@@ -197,7 +197,7 @@ static void qbus_initfn(Object *obj)
TYPE_HOTPLUG_HANDLER,
(Object **)&bus->hotplug_handler,
object_property_allow_set_link,
OBJ_PROP_LINK_UNREF_ON_RELEASE,
0,
NULL);
object_property_add_bool(obj, "realized",
bus_get_realized, bus_set_realized, NULL);

View File

@@ -434,6 +434,19 @@ int load_elf_as(const char *filename,
void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
uint64_t *highaddr, int big_endian, int elf_machine,
int clear_lsb, int data_swab, AddressSpace *as)
{
return load_elf_ram(filename, translate_fn, translate_opaque,
pentry, lowaddr, highaddr, big_endian, elf_machine,
clear_lsb, data_swab, as, true);
}
/* return < 0 if error, otherwise the number of bytes loaded in memory */
int load_elf_ram(const char *filename,
uint64_t (*translate_fn)(void *, uint64_t),
void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
uint64_t *highaddr, int big_endian, int elf_machine,
int clear_lsb, int data_swab, AddressSpace *as,
bool load_rom)
{
int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED;
uint8_t e_ident[EI_NIDENT];
@@ -473,11 +486,11 @@ int load_elf_as(const char *filename,
if (e_ident[EI_CLASS] == ELFCLASS64) {
ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab,
pentry, lowaddr, highaddr, elf_machine, clear_lsb,
data_swab, as);
data_swab, as, load_rom);
} else {
ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab,
pentry, lowaddr, highaddr, elf_machine, clear_lsb,
data_swab, as);
data_swab, as, load_rom);
}
fail:

View File

@@ -12,6 +12,7 @@
#include "qemu/host-utils.h"
#include "sysemu/replay.h"
#include "sysemu/qtest.h"
#include "block/aio.h"
#define DELTA_ADJUST 1
#define DELTA_NO_ADJUST -1
@@ -353,3 +354,10 @@ ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask)
s->policy_mask = policy_mask;
return s;
}
void ptimer_free(ptimer_state *s)
{
qemu_bh_delete(s->bh);
timer_free(s->timer);
g_free(s);
}

View File

@@ -73,14 +73,19 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr,
{
BlockBackend *blk;
bool blk_created = false;
int ret;
blk = blk_by_name(str);
if (!blk) {
BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
if (bs) {
blk = blk_new();
blk_insert_bs(blk, bs);
blk = blk_new(0, BLK_PERM_ALL);
blk_created = true;
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
goto fail;
}
}
}
if (!blk) {

View File

@@ -37,6 +37,7 @@
#include "hw/boards.h"
#include "hw/sysbus.h"
#include "qapi-event.h"
#include "migration/migration.h"
int qdev_hotplug = 0;
static bool qdev_hot_added = false;
@@ -102,9 +103,23 @@ static void bus_add_child(BusState *bus, DeviceState *child)
void qdev_set_parent_bus(DeviceState *dev, BusState *bus)
{
bool replugging = dev->parent_bus != NULL;
if (replugging) {
/* Keep a reference to the device while it's not plugged into
* any bus, to avoid it potentially evaporating when it is
* dereffed in bus_remove_child().
*/
object_ref(OBJECT(dev));
bus_remove_child(dev->parent_bus, dev);
object_unref(OBJECT(dev->parent_bus));
}
dev->parent_bus = bus;
object_ref(OBJECT(bus));
bus_add_child(bus, dev);
if (replugging) {
object_unref(OBJECT(dev));
}
}
/* Create a new device. This only initializes the device state
@@ -889,6 +904,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
Error *local_err = NULL;
bool unattached_parent = false;
static int unattached_count;
int ret;
if (dev->hotplugged && !dc->hotpluggable) {
error_setg(errp, QERR_DEVICE_NO_HOTPLUG, object_get_typename(obj));
@@ -896,6 +912,11 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
}
if (value && !dev->realized) {
ret = check_migratable(obj, &local_err);
if (ret < 0) {
goto fail;
}
if (!obj->parent) {
gchar *name = g_strdup_printf("device[%d]", unattached_count++);

View File

@@ -7,3 +7,4 @@ common-obj-$(CONFIG_GPIO_KEY) += gpio_key.o
obj-$(CONFIG_OMAP) += omap_gpio.o
obj-$(CONFIG_IMX) += imx_gpio.o
obj-$(CONFIG_RASPI) += bcm2835_gpio.o

353
hw/gpio/bcm2835_gpio.c Normal file
View File

@@ -0,0 +1,353 @@
/*
* Raspberry Pi (BCM2835) GPIO Controller
*
* Copyright (c) 2017 Antfield SAS
*
* Authors:
* Clement Deschamps <clement.deschamps@antfield.fr>
* Luc Michel <luc.michel@antfield.fr>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "qemu/timer.h"
#include "qapi/error.h"
#include "hw/sysbus.h"
#include "hw/sd/sd.h"
#include "hw/gpio/bcm2835_gpio.h"
#define GPFSEL0 0x00
#define GPFSEL1 0x04
#define GPFSEL2 0x08
#define GPFSEL3 0x0C
#define GPFSEL4 0x10
#define GPFSEL5 0x14
#define GPSET0 0x1C
#define GPSET1 0x20
#define GPCLR0 0x28
#define GPCLR1 0x2C
#define GPLEV0 0x34
#define GPLEV1 0x38
#define GPEDS0 0x40
#define GPEDS1 0x44
#define GPREN0 0x4C
#define GPREN1 0x50
#define GPFEN0 0x58
#define GPFEN1 0x5C
#define GPHEN0 0x64
#define GPHEN1 0x68
#define GPLEN0 0x70
#define GPLEN1 0x74
#define GPAREN0 0x7C
#define GPAREN1 0x80
#define GPAFEN0 0x88
#define GPAFEN1 0x8C
#define GPPUD 0x94
#define GPPUDCLK0 0x98
#define GPPUDCLK1 0x9C
static uint32_t gpfsel_get(BCM2835GpioState *s, uint8_t reg)
{
int i;
uint32_t value = 0;
for (i = 0; i < 10; i++) {
uint32_t index = 10 * reg + i;
if (index < sizeof(s->fsel)) {
value |= (s->fsel[index] & 0x7) << (3 * i);
}
}
return value;
}
static void gpfsel_set(BCM2835GpioState *s, uint8_t reg, uint32_t value)
{
int i;
for (i = 0; i < 10; i++) {
uint32_t index = 10 * reg + i;
if (index < sizeof(s->fsel)) {
int fsel = (value >> (3 * i)) & 0x7;
s->fsel[index] = fsel;
}
}
/* SD controller selection (48-53) */
if (s->sd_fsel != 0
&& (s->fsel[48] == 0) /* SD_CLK_R */
&& (s->fsel[49] == 0) /* SD_CMD_R */
&& (s->fsel[50] == 0) /* SD_DATA0_R */
&& (s->fsel[51] == 0) /* SD_DATA1_R */
&& (s->fsel[52] == 0) /* SD_DATA2_R */
&& (s->fsel[53] == 0) /* SD_DATA3_R */
) {
/* SDHCI controller selected */
sdbus_reparent_card(s->sdbus_sdhost, s->sdbus_sdhci);
s->sd_fsel = 0;
} else if (s->sd_fsel != 4
&& (s->fsel[48] == 4) /* SD_CLK_R */
&& (s->fsel[49] == 4) /* SD_CMD_R */
&& (s->fsel[50] == 4) /* SD_DATA0_R */
&& (s->fsel[51] == 4) /* SD_DATA1_R */
&& (s->fsel[52] == 4) /* SD_DATA2_R */
&& (s->fsel[53] == 4) /* SD_DATA3_R */
) {
/* SDHost controller selected */
sdbus_reparent_card(s->sdbus_sdhci, s->sdbus_sdhost);
s->sd_fsel = 4;
}
}
static int gpfsel_is_out(BCM2835GpioState *s, int index)
{
if (index >= 0 && index < 54) {
return s->fsel[index] == 1;
}
return 0;
}
static void gpset(BCM2835GpioState *s,
uint32_t val, uint8_t start, uint8_t count, uint32_t *lev)
{
uint32_t changes = val & ~*lev;
uint32_t cur = 1;
int i;
for (i = 0; i < count; i++) {
if ((changes & cur) && (gpfsel_is_out(s, start + i))) {
qemu_set_irq(s->out[start + i], 1);
}
cur <<= 1;
}
*lev |= val;
}
static void gpclr(BCM2835GpioState *s,
uint32_t val, uint8_t start, uint8_t count, uint32_t *lev)
{
uint32_t changes = val & *lev;
uint32_t cur = 1;
int i;
for (i = 0; i < count; i++) {
if ((changes & cur) && (gpfsel_is_out(s, start + i))) {
qemu_set_irq(s->out[start + i], 0);
}
cur <<= 1;
}
*lev &= ~val;
}
static uint64_t bcm2835_gpio_read(void *opaque, hwaddr offset,
unsigned size)
{
BCM2835GpioState *s = (BCM2835GpioState *)opaque;
switch (offset) {
case GPFSEL0:
case GPFSEL1:
case GPFSEL2:
case GPFSEL3:
case GPFSEL4:
case GPFSEL5:
return gpfsel_get(s, offset / 4);
case GPSET0:
case GPSET1:
/* Write Only */
return 0;
case GPCLR0:
case GPCLR1:
/* Write Only */
return 0;
case GPLEV0:
return s->lev0;
case GPLEV1:
return s->lev1;
case GPEDS0:
case GPEDS1:
case GPREN0:
case GPREN1:
case GPFEN0:
case GPFEN1:
case GPHEN0:
case GPHEN1:
case GPLEN0:
case GPLEN1:
case GPAREN0:
case GPAREN1:
case GPAFEN0:
case GPAFEN1:
case GPPUD:
case GPPUDCLK0:
case GPPUDCLK1:
/* Not implemented */
return 0;
default:
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
__func__, offset);
break;
}
return 0;
}
static void bcm2835_gpio_write(void *opaque, hwaddr offset,
uint64_t value, unsigned size)
{
BCM2835GpioState *s = (BCM2835GpioState *)opaque;
switch (offset) {
case GPFSEL0:
case GPFSEL1:
case GPFSEL2:
case GPFSEL3:
case GPFSEL4:
case GPFSEL5:
gpfsel_set(s, offset / 4, value);
break;
case GPSET0:
gpset(s, value, 0, 32, &s->lev0);
break;
case GPSET1:
gpset(s, value, 32, 22, &s->lev1);
break;
case GPCLR0:
gpclr(s, value, 0, 32, &s->lev0);
break;
case GPCLR1:
gpclr(s, value, 32, 22, &s->lev1);
break;
case GPLEV0:
case GPLEV1:
/* Read Only */
break;
case GPEDS0:
case GPEDS1:
case GPREN0:
case GPREN1:
case GPFEN0:
case GPFEN1:
case GPHEN0:
case GPHEN1:
case GPLEN0:
case GPLEN1:
case GPAREN0:
case GPAREN1:
case GPAFEN0:
case GPAFEN1:
case GPPUD:
case GPPUDCLK0:
case GPPUDCLK1:
/* Not implemented */
break;
default:
goto err_out;
}
return;
err_out:
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
__func__, offset);
}
static void bcm2835_gpio_reset(DeviceState *dev)
{
BCM2835GpioState *s = BCM2835_GPIO(dev);
int i;
for (i = 0; i < 6; i++) {
gpfsel_set(s, i, 0);
}
s->sd_fsel = 0;
/* SDHCI is selected by default */
sdbus_reparent_card(&s->sdbus, s->sdbus_sdhci);
s->lev0 = 0;
s->lev1 = 0;
}
static const MemoryRegionOps bcm2835_gpio_ops = {
.read = bcm2835_gpio_read,
.write = bcm2835_gpio_write,
.endianness = DEVICE_NATIVE_ENDIAN,
};
static const VMStateDescription vmstate_bcm2835_gpio = {
.name = "bcm2835_gpio",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT8_ARRAY(fsel, BCM2835GpioState, 54),
VMSTATE_UINT32(lev0, BCM2835GpioState),
VMSTATE_UINT32(lev1, BCM2835GpioState),
VMSTATE_UINT8(sd_fsel, BCM2835GpioState),
VMSTATE_END_OF_LIST()
}
};
static void bcm2835_gpio_init(Object *obj)
{
BCM2835GpioState *s = BCM2835_GPIO(obj);
DeviceState *dev = DEVICE(obj);
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
TYPE_SD_BUS, DEVICE(s), "sd-bus");
memory_region_init_io(&s->iomem, obj,
&bcm2835_gpio_ops, s, "bcm2835_gpio", 0x1000);
sysbus_init_mmio(sbd, &s->iomem);
qdev_init_gpio_out(dev, s->out, 54);
}
static void bcm2835_gpio_realize(DeviceState *dev, Error **errp)
{
BCM2835GpioState *s = BCM2835_GPIO(dev);
Object *obj;
Error *err = NULL;
obj = object_property_get_link(OBJECT(dev), "sdbus-sdhci", &err);
if (obj == NULL) {
error_setg(errp, "%s: required sdhci link not found: %s",
__func__, error_get_pretty(err));
return;
}
s->sdbus_sdhci = SD_BUS(obj);
obj = object_property_get_link(OBJECT(dev), "sdbus-sdhost", &err);
if (obj == NULL) {
error_setg(errp, "%s: required sdhost link not found: %s",
__func__, error_get_pretty(err));
return;
}
s->sdbus_sdhost = SD_BUS(obj);
}
static void bcm2835_gpio_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->vmsd = &vmstate_bcm2835_gpio;
dc->realize = &bcm2835_gpio_realize;
dc->reset = &bcm2835_gpio_reset;
}
static const TypeInfo bcm2835_gpio_info = {
.name = TYPE_BCM2835_GPIO,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(BCM2835GpioState),
.instance_init = bcm2835_gpio_init,
.class_init = bcm2835_gpio_class_init,
};
static void bcm2835_gpio_register_types(void)
{
type_register_static(&bcm2835_gpio_info);
}
type_init(bcm2835_gpio_register_types)

View File

@@ -462,7 +462,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
*bus_bsel = (*bsel_alloc)++;
object_property_add_uint32_ptr(OBJECT(bus), ACPI_PCIHP_PROP_BSEL,
bus_bsel, NULL);
bus_bsel, &error_abort);
}
return bsel_alloc;
@@ -471,7 +471,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
static void acpi_set_pci_info(void)
{
PCIBus *bus = find_i440fx(); /* TODO: Q35 support */
unsigned bsel_alloc = 0;
unsigned bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT;
if (bus) {
/* Scan all PCI buses. Set property to enable acpi based hotplug. */

View File

@@ -1120,7 +1120,7 @@ static void ide_cfata_metadata_write(IDEState *s)
}
/* called when the inserted state of the media has changed */
static void ide_cd_change_cb(void *opaque, bool load)
static void ide_cd_change_cb(void *opaque, bool load, Error **errp)
{
IDEState *s = opaque;
uint64_t nb_sectors;

View File

@@ -170,7 +170,7 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
return -1;
} else {
/* Anonymous BlockBackend for an empty drive */
dev->conf.blk = blk_new();
dev->conf.blk = blk_new(0, BLK_PERM_ALL);
}
}
@@ -196,7 +196,12 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
return -1;
}
}
blkconf_apply_backend_options(&dev->conf);
blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD,
&err);
if (err) {
error_report_err(err);
return -1;
}
if (ide_init_drive(s, dev->conf.blk, kind,
dev->version, dev->serial, dev->model, dev->wwn,

View File

@@ -24,7 +24,7 @@ obj-$(CONFIG_APIC) += apic.o apic_common.o
obj-$(CONFIG_ARM_GIC_KVM) += arm_gic_kvm.o
obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_kvm.o
obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_its_kvm.o
obj-$(CONFIG_STELLARIS) += armv7m_nvic.o
obj-$(CONFIG_ARM_V7M) += armv7m_nvic.o
obj-$(CONFIG_EXYNOS4) += exynos4210_gic.o exynos4210_combiner.o
obj-$(CONFIG_GRLIB) += grlib_irqmp.o
obj-$(CONFIG_IOAPIC) += ioapic.o

View File

@@ -70,6 +70,38 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = {
}
};
static int icc_sre_el1_reg_pre_load(void *opaque)
{
GICv3CPUState *cs = opaque;
/*
* If the sre_el1 subsection is not transferred this
* means SRE_EL1 is 0x7 (which might not be the same as
* our reset value).
*/
cs->icc_sre_el1 = 0x7;
return 0;
}
static bool icc_sre_el1_reg_needed(void *opaque)
{
GICv3CPUState *cs = opaque;
return cs->icc_sre_el1 != 7;
}
const VMStateDescription vmstate_gicv3_cpu_sre_el1 = {
.name = "arm_gicv3_cpu/sre_el1",
.version_id = 1,
.minimum_version_id = 1,
.pre_load = icc_sre_el1_reg_pre_load,
.needed = icc_sre_el1_reg_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription vmstate_gicv3_cpu = {
.name = "arm_gicv3_cpu",
.version_id = 1,
@@ -100,6 +132,10 @@ static const VMStateDescription vmstate_gicv3_cpu = {
.subsections = (const VMStateDescription * []) {
&vmstate_gicv3_cpu_virt,
NULL
},
.subsections = (const VMStateDescription * []) {
&vmstate_gicv3_cpu_sre_el1,
NULL
}
};
@@ -216,6 +252,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
s->cpu[i].cpu = cpu;
s->cpu[i].gic = s;
/* Store GICv3CPUState in CPUARMState gicv3state pointer */
gicv3_set_gicv3state(cpu, &s->cpu[i]);
/* Pre-construct the GICR_TYPER:
* For our implementation:

View File

@@ -19,6 +19,14 @@
#include "gicv3_internal.h"
#include "cpu.h"
void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s)
{
ARMCPU *arm_cpu = ARM_CPU(cpu);
CPUARMState *env = &arm_cpu->env;
env->gicv3state = (void *)s;
};
static GICv3CPUState *icc_cs_from_env(CPUARMState *env)
{
/* Given the CPU, find the right GICv3CPUState struct.

View File

@@ -23,8 +23,10 @@
#include "qapi/error.h"
#include "hw/intc/arm_gicv3_common.h"
#include "hw/sysbus.h"
#include "qemu/error-report.h"
#include "sysemu/kvm.h"
#include "kvm_arm.h"
#include "gicv3_internal.h"
#include "vgic_common.h"
#include "migration/migration.h"
@@ -44,6 +46,32 @@
#define KVM_ARM_GICV3_GET_CLASS(obj) \
OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
#define KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2) \
(ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
#define ICC_PMR_EL1 \
KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
#define ICC_BPR0_EL1 \
KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
#define ICC_AP0R_EL1(n) \
KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
#define ICC_AP1R_EL1(n) \
KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
#define ICC_BPR1_EL1 \
KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
#define ICC_CTLR_EL1 \
KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
#define ICC_SRE_EL1 \
KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5)
#define ICC_IGRPEN0_EL1 \
KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
#define ICC_IGRPEN1_EL1 \
KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
typedef struct KVMARMGICv3Class {
ARMGICv3CommonClass parent_class;
DeviceRealize parent_realize;
@@ -57,16 +85,549 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level)
kvm_arm_gic_set_irq(s->num_irq, irq, level);
}
#define KVM_VGIC_ATTR(reg, typer) \
((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg))
static inline void kvm_gicd_access(GICv3State *s, int offset,
uint32_t *val, bool write)
{
kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
KVM_VGIC_ATTR(offset, 0),
val, write);
}
static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
uint32_t *val, bool write)
{
kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
val, write);
}
static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
uint64_t *val, bool write)
{
kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
val, write);
}
static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu,
uint32_t *val, bool write)
{
kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) |
(VGIC_LEVEL_INFO_LINE_LEVEL <<
KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT),
val, write);
}
/* Loop through each distributor IRQ related register; since bits
* corresponding to SPIs and PPIs are RAZ/WI when affinity routing
* is enabled, we skip those.
*/
#define for_each_dist_irq_reg(_irq, _max, _field_width) \
for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width))
static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
{
uint32_t reg, *field;
int irq;
field = (uint32_t *)bmp;
for_each_dist_irq_reg(irq, s->num_irq, 8) {
kvm_gicd_access(s, offset, &reg, false);
*field = reg;
offset += 4;
field++;
}
}
static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
{
uint32_t reg, *field;
int irq;
field = (uint32_t *)bmp;
for_each_dist_irq_reg(irq, s->num_irq, 8) {
reg = *field;
kvm_gicd_access(s, offset, &reg, true);
offset += 4;
field++;
}
}
static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset,
uint32_t *bmp)
{
uint32_t reg;
int irq;
for_each_dist_irq_reg(irq, s->num_irq, 2) {
kvm_gicd_access(s, offset, &reg, false);
reg = half_unshuffle32(reg >> 1);
if (irq % 32 != 0) {
reg = (reg << 16);
}
*gic_bmp_ptr32(bmp, irq) |= reg;
offset += 4;
}
}
static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset,
uint32_t *bmp)
{
uint32_t reg;
int irq;
for_each_dist_irq_reg(irq, s->num_irq, 2) {
reg = *gic_bmp_ptr32(bmp, irq);
if (irq % 32 != 0) {
reg = (reg & 0xffff0000) >> 16;
} else {
reg = reg & 0xffff;
}
reg = half_shuffle32(reg) << 1;
kvm_gicd_access(s, offset, &reg, true);
offset += 4;
}
}
static void kvm_gic_get_line_level_bmp(GICv3State *s, uint32_t *bmp)
{
uint32_t reg;
int irq;
for_each_dist_irq_reg(irq, s->num_irq, 1) {
kvm_gic_line_level_access(s, irq, 0, &reg, false);
*gic_bmp_ptr32(bmp, irq) = reg;
}
}
static void kvm_gic_put_line_level_bmp(GICv3State *s, uint32_t *bmp)
{
uint32_t reg;
int irq;
for_each_dist_irq_reg(irq, s->num_irq, 1) {
reg = *gic_bmp_ptr32(bmp, irq);
kvm_gic_line_level_access(s, irq, 0, &reg, true);
}
}
/* Read a bitmap register group from the kernel VGIC. */
static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp)
{
uint32_t reg;
int irq;
for_each_dist_irq_reg(irq, s->num_irq, 1) {
kvm_gicd_access(s, offset, &reg, false);
*gic_bmp_ptr32(bmp, irq) = reg;
offset += 4;
}
}
static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
uint32_t clroffset, uint32_t *bmp)
{
uint32_t reg;
int irq;
for_each_dist_irq_reg(irq, s->num_irq, 1) {
/* If this bitmap is a set/clear register pair, first write to the
* clear-reg to clear all bits before using the set-reg to write
* the 1 bits.
*/
if (clroffset != 0) {
reg = 0;
kvm_gicd_access(s, clroffset, &reg, true);
}
reg = *gic_bmp_ptr32(bmp, irq);
kvm_gicd_access(s, offset, &reg, true);
offset += 4;
}
}
static void kvm_arm_gicv3_check(GICv3State *s)
{
uint32_t reg;
uint32_t num_irq;
/* Sanity checking s->num_irq */
kvm_gicd_access(s, GICD_TYPER, &reg, false);
num_irq = ((reg & 0x1f) + 1) * 32;
if (num_irq < s->num_irq) {
error_report("Model requests %u IRQs, but kernel supports max %u",
s->num_irq, num_irq);
abort();
}
}
static void kvm_arm_gicv3_put(GICv3State *s)
{
/* TODO */
DPRINTF("Cannot put kernel gic state, no kernel interface\n");
uint32_t regl, regh, reg;
uint64_t reg64, redist_typer;
int ncpu, i;
kvm_arm_gicv3_check(s);
kvm_gicr_access(s, GICR_TYPER, 0, &regl, false);
kvm_gicr_access(s, GICR_TYPER + 4, 0, &regh, false);
redist_typer = ((uint64_t)regh << 32) | regl;
reg = s->gicd_ctlr;
kvm_gicd_access(s, GICD_CTLR, &reg, true);
if (redist_typer & GICR_TYPER_PLPIS) {
/* Set base addresses before LPIs are enabled by GICR_CTLR write */
for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
GICv3CPUState *c = &s->cpu[ncpu];
reg64 = c->gicr_propbaser;
regl = (uint32_t)reg64;
kvm_gicr_access(s, GICR_PROPBASER, ncpu, &regl, true);
regh = (uint32_t)(reg64 >> 32);
kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, &regh, true);
reg64 = c->gicr_pendbaser;
if (!c->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) {
/* Setting PTZ is advised if LPIs are disabled, to reduce
* GIC initialization time.
*/
reg64 |= GICR_PENDBASER_PTZ;
}
regl = (uint32_t)reg64;
kvm_gicr_access(s, GICR_PENDBASER, ncpu, &regl, true);
regh = (uint32_t)(reg64 >> 32);
kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, &regh, true);
}
}
/* Redistributor state (one per CPU) */
for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
GICv3CPUState *c = &s->cpu[ncpu];
reg = c->gicr_ctlr;
kvm_gicr_access(s, GICR_CTLR, ncpu, &reg, true);
reg = c->gicr_statusr[GICV3_NS];
kvm_gicr_access(s, GICR_STATUSR, ncpu, &reg, true);
reg = c->gicr_waker;
kvm_gicr_access(s, GICR_WAKER, ncpu, &reg, true);
reg = c->gicr_igroupr0;
kvm_gicr_access(s, GICR_IGROUPR0, ncpu, &reg, true);
reg = ~0;
kvm_gicr_access(s, GICR_ICENABLER0, ncpu, &reg, true);
reg = c->gicr_ienabler0;
kvm_gicr_access(s, GICR_ISENABLER0, ncpu, &reg, true);
/* Restore config before pending so we treat level/edge correctly */
reg = half_shuffle32(c->edge_trigger >> 16) << 1;
kvm_gicr_access(s, GICR_ICFGR1, ncpu, &reg, true);
reg = c->level;
kvm_gic_line_level_access(s, 0, ncpu, &reg, true);
reg = ~0;
kvm_gicr_access(s, GICR_ICPENDR0, ncpu, &reg, true);
reg = c->gicr_ipendr0;
kvm_gicr_access(s, GICR_ISPENDR0, ncpu, &reg, true);
reg = ~0;
kvm_gicr_access(s, GICR_ICACTIVER0, ncpu, &reg, true);
reg = c->gicr_iactiver0;
kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, &reg, true);
for (i = 0; i < GIC_INTERNAL; i += 4) {
reg = c->gicr_ipriorityr[i] |
(c->gicr_ipriorityr[i + 1] << 8) |
(c->gicr_ipriorityr[i + 2] << 16) |
(c->gicr_ipriorityr[i + 3] << 24);
kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, &reg, true);
}
}
/* Distributor state (shared between all CPUs */
reg = s->gicd_statusr[GICV3_NS];
kvm_gicd_access(s, GICD_STATUSR, &reg, true);
/* s->enable bitmap -> GICD_ISENABLERn */
kvm_dist_putbmp(s, GICD_ISENABLER, GICD_ICENABLER, s->enabled);
/* s->group bitmap -> GICD_IGROUPRn */
kvm_dist_putbmp(s, GICD_IGROUPR, 0, s->group);
/* Restore targets before pending to ensure the pending state is set on
* the appropriate CPU interfaces in the kernel
*/
/* s->gicd_irouter[irq] -> GICD_IROUTERn
* We can't use kvm_dist_put() here because the registers are 64-bit
*/
for (i = GIC_INTERNAL; i < s->num_irq; i++) {
uint32_t offset;
offset = GICD_IROUTER + (sizeof(uint32_t) * i);
reg = (uint32_t)s->gicd_irouter[i];
kvm_gicd_access(s, offset, &reg, true);
offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4;
reg = (uint32_t)(s->gicd_irouter[i] >> 32);
kvm_gicd_access(s, offset, &reg, true);
}
/* s->trigger bitmap -> GICD_ICFGRn
* (restore configuration registers before pending IRQs so we treat
* level/edge correctly)
*/
kvm_dist_put_edge_trigger(s, GICD_ICFGR, s->edge_trigger);
/* s->level bitmap -> line_level */
kvm_gic_put_line_level_bmp(s, s->level);
/* s->pending bitmap -> GICD_ISPENDRn */
kvm_dist_putbmp(s, GICD_ISPENDR, GICD_ICPENDR, s->pending);
/* s->active bitmap -> GICD_ISACTIVERn */
kvm_dist_putbmp(s, GICD_ISACTIVER, GICD_ICACTIVER, s->active);
/* s->gicd_ipriority[] -> GICD_IPRIORITYRn */
kvm_dist_put_priority(s, GICD_IPRIORITYR, s->gicd_ipriority);
/* CPU Interface state (one per CPU) */
for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
GICv3CPUState *c = &s->cpu[ncpu];
int num_pri_bits;
kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true);
kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
&c->icc_ctlr_el1[GICV3_NS], true);
kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu,
&c->icc_igrpen[GICV3_G0], true);
kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu,
&c->icc_igrpen[GICV3_G1NS], true);
kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, true);
kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], true);
kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], true);
num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] &
ICC_CTLR_EL1_PRIBITS_MASK) >>
ICC_CTLR_EL1_PRIBITS_SHIFT) + 1;
switch (num_pri_bits) {
case 7:
reg64 = c->icc_apr[GICV3_G0][3];
kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, &reg64, true);
reg64 = c->icc_apr[GICV3_G0][2];
kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, &reg64, true);
case 6:
reg64 = c->icc_apr[GICV3_G0][1];
kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, &reg64, true);
default:
reg64 = c->icc_apr[GICV3_G0][0];
kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, &reg64, true);
}
switch (num_pri_bits) {
case 7:
reg64 = c->icc_apr[GICV3_G1NS][3];
kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, &reg64, true);
reg64 = c->icc_apr[GICV3_G1NS][2];
kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, &reg64, true);
case 6:
reg64 = c->icc_apr[GICV3_G1NS][1];
kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, &reg64, true);
default:
reg64 = c->icc_apr[GICV3_G1NS][0];
kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, &reg64, true);
}
}
}
static void kvm_arm_gicv3_get(GICv3State *s)
{
/* TODO */
DPRINTF("Cannot get kernel gic state, no kernel interface\n");
uint32_t regl, regh, reg;
uint64_t reg64, redist_typer;
int ncpu, i;
kvm_arm_gicv3_check(s);
kvm_gicr_access(s, GICR_TYPER, 0, &regl, false);
kvm_gicr_access(s, GICR_TYPER + 4, 0, &regh, false);
redist_typer = ((uint64_t)regh << 32) | regl;
kvm_gicd_access(s, GICD_CTLR, &reg, false);
s->gicd_ctlr = reg;
/* Redistributor state (one per CPU) */
for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
GICv3CPUState *c = &s->cpu[ncpu];
kvm_gicr_access(s, GICR_CTLR, ncpu, &reg, false);
c->gicr_ctlr = reg;
kvm_gicr_access(s, GICR_STATUSR, ncpu, &reg, false);
c->gicr_statusr[GICV3_NS] = reg;
kvm_gicr_access(s, GICR_WAKER, ncpu, &reg, false);
c->gicr_waker = reg;
kvm_gicr_access(s, GICR_IGROUPR0, ncpu, &reg, false);
c->gicr_igroupr0 = reg;
kvm_gicr_access(s, GICR_ISENABLER0, ncpu, &reg, false);
c->gicr_ienabler0 = reg;
kvm_gicr_access(s, GICR_ICFGR1, ncpu, &reg, false);
c->edge_trigger = half_unshuffle32(reg >> 1) << 16;
kvm_gic_line_level_access(s, 0, ncpu, &reg, false);
c->level = reg;
kvm_gicr_access(s, GICR_ISPENDR0, ncpu, &reg, false);
c->gicr_ipendr0 = reg;
kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, &reg, false);
c->gicr_iactiver0 = reg;
for (i = 0; i < GIC_INTERNAL; i += 4) {
kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, &reg, false);
c->gicr_ipriorityr[i] = extract32(reg, 0, 8);
c->gicr_ipriorityr[i + 1] = extract32(reg, 8, 8);
c->gicr_ipriorityr[i + 2] = extract32(reg, 16, 8);
c->gicr_ipriorityr[i + 3] = extract32(reg, 24, 8);
}
}
if (redist_typer & GICR_TYPER_PLPIS) {
for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
GICv3CPUState *c = &s->cpu[ncpu];
kvm_gicr_access(s, GICR_PROPBASER, ncpu, &regl, false);
kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, &regh, false);
c->gicr_propbaser = ((uint64_t)regh << 32) | regl;
kvm_gicr_access(s, GICR_PENDBASER, ncpu, &regl, false);
kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, &regh, false);
c->gicr_pendbaser = ((uint64_t)regh << 32) | regl;
}
}
/* Distributor state (shared between all CPUs */
kvm_gicd_access(s, GICD_STATUSR, &reg, false);
s->gicd_statusr[GICV3_NS] = reg;
/* GICD_IGROUPRn -> s->group bitmap */
kvm_dist_getbmp(s, GICD_IGROUPR, s->group);
/* GICD_ISENABLERn -> s->enabled bitmap */
kvm_dist_getbmp(s, GICD_ISENABLER, s->enabled);
/* Line level of irq */
kvm_gic_get_line_level_bmp(s, s->level);
/* GICD_ISPENDRn -> s->pending bitmap */
kvm_dist_getbmp(s, GICD_ISPENDR, s->pending);
/* GICD_ISACTIVERn -> s->active bitmap */
kvm_dist_getbmp(s, GICD_ISACTIVER, s->active);
/* GICD_ICFGRn -> s->trigger bitmap */
kvm_dist_get_edge_trigger(s, GICD_ICFGR, s->edge_trigger);
/* GICD_IPRIORITYRn -> s->gicd_ipriority[] */
kvm_dist_get_priority(s, GICD_IPRIORITYR, s->gicd_ipriority);
/* GICD_IROUTERn -> s->gicd_irouter[irq] */
for (i = GIC_INTERNAL; i < s->num_irq; i++) {
uint32_t offset;
offset = GICD_IROUTER + (sizeof(uint32_t) * i);
kvm_gicd_access(s, offset, &regl, false);
offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4;
kvm_gicd_access(s, offset, &regh, false);
s->gicd_irouter[i] = ((uint64_t)regh << 32) | regl;
}
/*****************************************************************
* CPU Interface(s) State
*/
for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
GICv3CPUState *c = &s->cpu[ncpu];
int num_pri_bits;
kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false);
kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
&c->icc_ctlr_el1[GICV3_NS], false);
kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu,
&c->icc_igrpen[GICV3_G0], false);
kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu,
&c->icc_igrpen[GICV3_G1NS], false);
kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, false);
kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], false);
kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], false);
num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] &
ICC_CTLR_EL1_PRIBITS_MASK) >>
ICC_CTLR_EL1_PRIBITS_SHIFT) + 1;
switch (num_pri_bits) {
case 7:
kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, &reg64, false);
c->icc_apr[GICV3_G0][3] = reg64;
kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, &reg64, false);
c->icc_apr[GICV3_G0][2] = reg64;
case 6:
kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, &reg64, false);
c->icc_apr[GICV3_G0][1] = reg64;
default:
kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, &reg64, false);
c->icc_apr[GICV3_G0][0] = reg64;
}
switch (num_pri_bits) {
case 7:
kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, &reg64, false);
c->icc_apr[GICV3_G1NS][3] = reg64;
kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, &reg64, false);
c->icc_apr[GICV3_G1NS][2] = reg64;
case 6:
kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, &reg64, false);
c->icc_apr[GICV3_G1NS][1] = reg64;
default:
kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, &reg64, false);
c->icc_apr[GICV3_G1NS][0] = reg64;
}
}
}
static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
{
ARMCPU *cpu;
GICv3State *s;
GICv3CPUState *c;
c = (GICv3CPUState *)env->gicv3state;
s = c->gic;
cpu = ARM_CPU(c->cpu);
/* Initialize to actual HW supported configuration */
kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
&c->icc_ctlr_el1[GICV3_NS], false);
c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
c->icc_pmr_el1 = 0;
c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
c->icc_sre_el1 = 0x7;
memset(c->icc_apr, 0, sizeof(c->icc_apr));
memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
}
static void kvm_arm_gicv3_reset(DeviceState *dev)
@@ -77,9 +638,43 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
DPRINTF("Reset\n");
kgc->parent_reset(dev);
if (s->migration_blocker) {
DPRINTF("Cannot put kernel gic state, no kernel interface\n");
return;
}
kvm_arm_gicv3_put(s);
}
/*
* CPU interface registers of GIC needs to be reset on CPU reset.
* For the calling arm_gicv3_icc_reset() on CPU reset, we register
* below ARMCPRegInfo. As we reset the whole cpu interface under single
* register reset, we define only one register of CPU interface instead
* of defining all the registers.
*/
static const ARMCPRegInfo gicv3_cpuif_reginfo[] = {
{ .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4,
/*
* If ARM_CP_NOP is used, resetfn is not called,
* So ARM_CP_NO_RAW is appropriate type.
*/
.type = ARM_CP_NO_RAW,
.access = PL1_RW,
.readfn = arm_cp_read_zero,
.writefn = arm_cp_write_ignore,
/*
* We hang the whole cpu interface reset routine off here
* rather than parcelling it out into one little function
* per register
*/
.resetfn = arm_gicv3_icc_reset,
},
REGINFO_SENTINEL
};
static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
{
GICv3State *s = KVM_ARM_GICV3(dev);
@@ -103,16 +698,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
/* Block migration of a KVM GICv3 device: the API for saving and restoring
* the state in the kernel is not yet finalised in the kernel or
* implemented in QEMU.
*/
error_setg(&s->migration_blocker, "vGICv3 migration is not implemented");
migrate_add_blocker(s->migration_blocker, &local_err);
if (local_err) {
error_propagate(errp, local_err);
error_free(s->migration_blocker);
return;
for (i = 0; i < s->num_cpu; i++) {
ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
}
/* Try to create the device via the device control API */
@@ -145,6 +734,18 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
kvm_irqchip_commit_routes(kvm_state);
}
if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
GICD_CTLR)) {
error_setg(&s->migration_blocker, "This operating system kernel does "
"not support vGICv3 migration");
migrate_add_blocker(s->migration_blocker, &local_err);
if (local_err) {
error_propagate(errp, local_err);
error_free(s->migration_blocker);
return;
}
}
}
static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data)

View File

@@ -17,8 +17,8 @@
#include "hw/sysbus.h"
#include "qemu/timer.h"
#include "hw/arm/arm.h"
#include "hw/arm/armv7m_nvic.h"
#include "target/arm/cpu.h"
#include "exec/address-spaces.h"
#include "qemu/log.h"
#include "trace.h"
@@ -47,7 +47,6 @@
* "exception" more or less interchangeably.
*/
#define NVIC_FIRST_IRQ 16
#define NVIC_MAX_VECTORS 512
#define NVIC_MAX_IRQ (NVIC_MAX_VECTORS - NVIC_FIRST_IRQ)
/* Effective running priority of the CPU when no exception is active
@@ -55,116 +54,10 @@
*/
#define NVIC_NOEXC_PRIO 0x100
typedef struct VecInfo {
/* Exception priorities can range from -3 to 255; only the unmodifiable
* priority values for RESET, NMI and HardFault can be negative.
*/
int16_t prio;
uint8_t enabled;
uint8_t pending;
uint8_t active;
uint8_t level; /* exceptions <=15 never set level */
} VecInfo;
typedef struct NVICState {
/*< private >*/
SysBusDevice parent_obj;
/*< public >*/
ARMCPU *cpu;
VecInfo vectors[NVIC_MAX_VECTORS];
uint32_t prigroup;
/* vectpending and exception_prio are both cached state that can
* be recalculated from the vectors[] array and the prigroup field.
*/
unsigned int vectpending; /* highest prio pending enabled exception */
int exception_prio; /* group prio of the highest prio active exception */
struct {
uint32_t control;
uint32_t reload;
int64_t tick;
QEMUTimer *timer;
} systick;
MemoryRegion sysregmem;
MemoryRegion container;
uint32_t num_irq;
qemu_irq excpout;
qemu_irq sysresetreq;
} NVICState;
#define TYPE_NVIC "armv7m_nvic"
#define NVIC(obj) \
OBJECT_CHECK(NVICState, (obj), TYPE_NVIC)
static const uint8_t nvic_id[] = {
0x00, 0xb0, 0x1b, 0x00, 0x0d, 0xe0, 0x05, 0xb1
};
/* qemu timers run at 1GHz. We want something closer to 1MHz. */
#define SYSTICK_SCALE 1000ULL
#define SYSTICK_ENABLE (1 << 0)
#define SYSTICK_TICKINT (1 << 1)
#define SYSTICK_CLKSOURCE (1 << 2)
#define SYSTICK_COUNTFLAG (1 << 16)
int system_clock_scale;
/* Conversion factor from qemu timer to SysTick frequencies. */
static inline int64_t systick_scale(NVICState *s)
{
if (s->systick.control & SYSTICK_CLKSOURCE)
return system_clock_scale;
else
return 1000;
}
static void systick_reload(NVICState *s, int reset)
{
/* The Cortex-M3 Devices Generic User Guide says that "When the
* ENABLE bit is set to 1, the counter loads the RELOAD value from the
* SYST RVR register and then counts down". So, we need to check the
* ENABLE bit before reloading the value.
*/
if ((s->systick.control & SYSTICK_ENABLE) == 0) {
return;
}
if (reset)
s->systick.tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
s->systick.tick += (s->systick.reload + 1) * systick_scale(s);
timer_mod(s->systick.timer, s->systick.tick);
}
static void systick_timer_tick(void * opaque)
{
NVICState *s = (NVICState *)opaque;
s->systick.control |= SYSTICK_COUNTFLAG;
if (s->systick.control & SYSTICK_TICKINT) {
/* Trigger the interrupt. */
armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
}
if (s->systick.reload == 0) {
s->systick.control &= ~SYSTICK_ENABLE;
} else {
systick_reload(s, 0);
}
}
static void systick_reset(NVICState *s)
{
s->systick.control = 0;
s->systick.reload = 0;
s->systick.tick = 0;
timer_del(s->systick.timer);
}
static int nvic_pending_prio(NVICState *s)
{
/* return the priority of the current pending interrupt,
@@ -510,30 +403,6 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset)
switch (offset) {
case 4: /* Interrupt Control Type. */
return ((s->num_irq - NVIC_FIRST_IRQ) / 32) - 1;
case 0x10: /* SysTick Control and Status. */
val = s->systick.control;
s->systick.control &= ~SYSTICK_COUNTFLAG;
return val;
case 0x14: /* SysTick Reload Value. */
return s->systick.reload;
case 0x18: /* SysTick Current Value. */
{
int64_t t;
if ((s->systick.control & SYSTICK_ENABLE) == 0)
return 0;
t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
if (t >= s->systick.tick)
return 0;
val = ((s->systick.tick - (t + 1)) / systick_scale(s)) + 1;
/* The interrupt in triggered when the timer reaches zero.
However the counter is not reloaded until the next clock
tick. This is a hack to return zero during the first tick. */
if (val > s->systick.reload)
val = 0;
return val;
}
case 0x1c: /* SysTick Calibration Value. */
return 10000;
case 0xd00: /* CPUID Base. */
return cpu->midr;
case 0xd04: /* Interrupt Control State. */
@@ -668,40 +537,8 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset)
static void nvic_writel(NVICState *s, uint32_t offset, uint32_t value)
{
ARMCPU *cpu = s->cpu;
uint32_t oldval;
switch (offset) {
case 0x10: /* SysTick Control and Status. */
oldval = s->systick.control;
s->systick.control &= 0xfffffff8;
s->systick.control |= value & 7;
if ((oldval ^ value) & SYSTICK_ENABLE) {
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
if (value & SYSTICK_ENABLE) {
if (s->systick.tick) {
s->systick.tick += now;
timer_mod(s->systick.timer, s->systick.tick);
} else {
systick_reload(s, 1);
}
} else {
timer_del(s->systick.timer);
s->systick.tick -= now;
if (s->systick.tick < 0)
s->systick.tick = 0;
}
} else if ((oldval ^ value) & SYSTICK_CLKSOURCE) {
/* This is a hack. Force the timer to be reloaded
when the reference clock is changed. */
systick_reload(s, 1);
}
break;
case 0x14: /* SysTick Reload Value. */
s->systick.reload = value;
break;
case 0x18: /* SysTick Current Value. Writes reload the timer. */
systick_reload(s, 1);
s->systick.control &= ~SYSTICK_COUNTFLAG;
break;
case 0xd04: /* Interrupt Control State. */
if (value & (1 << 31)) {
armv7m_nvic_set_pending(s, ARMV7M_EXCP_NMI);
@@ -1000,16 +837,12 @@ static const VMStateDescription vmstate_VecInfo = {
static const VMStateDescription vmstate_nvic = {
.name = "armv7m_nvic",
.version_id = 3,
.minimum_version_id = 3,
.version_id = 4,
.minimum_version_id = 4,
.post_load = &nvic_post_load,
.fields = (VMStateField[]) {
VMSTATE_STRUCT_ARRAY(vectors, NVICState, NVIC_MAX_VECTORS, 1,
vmstate_VecInfo, VecInfo),
VMSTATE_UINT32(systick.control, NVICState),
VMSTATE_UINT32(systick.reload, NVICState),
VMSTATE_INT64(systick.tick, NVICState),
VMSTATE_TIMER_PTR(systick.timer, NVICState),
VMSTATE_UINT32(prigroup, NVICState),
VMSTATE_END_OF_LIST()
}
@@ -1047,13 +880,26 @@ static void armv7m_nvic_reset(DeviceState *dev)
s->exception_prio = NVIC_NOEXC_PRIO;
s->vectpending = 0;
}
systick_reset(s);
static void nvic_systick_trigger(void *opaque, int n, int level)
{
NVICState *s = opaque;
if (level) {
/* SysTick just asked us to pend its exception.
* (This is different from an external interrupt line's
* behaviour.)
*/
armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
}
}
static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
{
NVICState *s = NVIC(dev);
SysBusDevice *systick_sbd;
Error *err = NULL;
s->cpu = ARM_CPU(qemu_get_cpu(0));
assert(s->cpu);
@@ -1068,10 +914,19 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
/* include space for internal exception vectors */
s->num_irq += NVIC_FIRST_IRQ;
object_property_set_bool(OBJECT(&s->systick), true, "realized", &err);
if (err != NULL) {
error_propagate(errp, err);
return;
}
systick_sbd = SYS_BUS_DEVICE(&s->systick);
sysbus_connect_irq(systick_sbd, 0,
qdev_get_gpio_in_named(dev, "systick-trigger", 0));
/* The NVIC and System Control Space (SCS) starts at 0xe000e000
* and looks like this:
* 0x004 - ICTR
* 0x010 - 0x1c - systick
* 0x010 - 0xff - systick
* 0x100..0x7ec - NVIC
* 0x7f0..0xcff - Reserved
* 0xd00..0xd3c - SCS registers
@@ -1089,12 +944,11 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
memory_region_init_io(&s->sysregmem, OBJECT(s), &nvic_sysreg_ops, s,
"nvic_sysregs", 0x1000);
memory_region_add_subregion(&s->container, 0, &s->sysregmem);
memory_region_add_subregion_overlap(&s->container, 0x10,
sysbus_mmio_get_region(systick_sbd, 0),
1);
/* Map the whole thing into system memory at the location required
* by the v7M architecture.
*/
memory_region_add_subregion(get_system_memory(), 0xe000e000, &s->container);
s->systick.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s);
sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container);
}
static void armv7m_nvic_instance_init(Object *obj)
@@ -1109,8 +963,12 @@ static void armv7m_nvic_instance_init(Object *obj)
NVICState *nvic = NVIC(obj);
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
object_initialize(&nvic->systick, sizeof(nvic->systick), TYPE_SYSTICK);
qdev_set_parent_bus(DEVICE(&nvic->systick), sysbus_get_default());
sysbus_init_irq(sbd, &nvic->excpout);
qdev_init_gpio_out_named(dev, &nvic->sysresetreq, "SYSRESETREQ", 1);
qdev_init_gpio_in_named(dev, nvic_systick_trigger, "systick-trigger", 1);
}
static void armv7m_nvic_class_init(ObjectClass *klass, void *data)

View File

@@ -138,6 +138,7 @@
#define ICC_CTLR_EL1_EOIMODE (1U << 1)
#define ICC_CTLR_EL1_PMHE (1U << 6)
#define ICC_CTLR_EL1_PRIBITS_SHIFT 8
#define ICC_CTLR_EL1_PRIBITS_MASK (7U << ICC_CTLR_EL1_PRIBITS_SHIFT)
#define ICC_CTLR_EL1_IDBITS_SHIFT 11
#define ICC_CTLR_EL1_SEIS (1U << 14)
#define ICC_CTLR_EL1_A3V (1U << 15)
@@ -407,4 +408,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s)
}
}
void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s);
#endif /* QEMU_ARM_GICV3_INTERNAL_H */

View File

@@ -49,40 +49,41 @@ int xics_get_cpu_index_by_dt_id(int cpu_dt_id)
return -1;
}
void xics_cpu_destroy(XICSState *xics, PowerPCCPU *cpu)
void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
ICPState *ss = &xics->ss[cs->cpu_index];
ICPState *icp = xics_icp_get(xi, cs->cpu_index);
assert(cs->cpu_index < xics->nr_servers);
assert(cs == ss->cs);
assert(icp);
assert(cs == icp->cs);
ss->output = NULL;
ss->cs = NULL;
icp->output = NULL;
icp->cs = NULL;
}
void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
ICPState *ss = &xics->ss[cs->cpu_index];
XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
ICPState *icp = xics_icp_get(xi, cs->cpu_index);
ICPStateClass *icpc;
assert(cs->cpu_index < xics->nr_servers);
assert(icp);
ss->cs = cs;
icp->cs = cs;
if (info->cpu_setup) {
info->cpu_setup(xics, cpu);
icpc = ICP_GET_CLASS(icp);
if (icpc->cpu_setup) {
icpc->cpu_setup(icp, cpu);
}
switch (PPC_INPUT(env)) {
case PPC_FLAGS_INPUT_POWER7:
ss->output = env->irq_inputs[POWER7_INPUT_INT];
icp->output = env->irq_inputs[POWER7_INPUT_INT];
break;
case PPC_FLAGS_INPUT_970:
ss->output = env->irq_inputs[PPC970_INPUT_INT];
icp->output = env->irq_inputs[PPC970_INPUT_INT];
break;
default:
@@ -92,185 +93,43 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
}
}
static void xics_common_pic_print_info(InterruptStatsProvider *obj,
Monitor *mon)
void icp_pic_print_info(ICPState *icp, Monitor *mon)
{
int cpu_index = icp->cs ? icp->cs->cpu_index : -1;
if (!icp->output) {
return;
}
monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
cpu_index, icp->xirr, icp->xirr_owner,
icp->pending_priority, icp->mfrr);
}
void ics_pic_print_info(ICSState *ics, Monitor *mon)
{
XICSState *xics = XICS_COMMON(obj);
ICSState *ics;
uint32_t i;
for (i = 0; i < xics->nr_servers; i++) {
ICPState *icp = &xics->ss[i];
monitor_printf(mon, "ICS %4x..%4x %p\n",
ics->offset, ics->offset + ics->nr_irqs - 1, ics);
if (!icp->output) {
if (!ics->irqs) {
return;
}
for (i = 0; i < ics->nr_irqs; i++) {
ICSIRQState *irq = ics->irqs + i;
if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
continue;
}
monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
i, icp->xirr, icp->xirr_owner,
icp->pending_priority, icp->mfrr);
}
QLIST_FOREACH(ics, &xics->ics, list) {
monitor_printf(mon, "ICS %4x..%4x %p\n",
ics->offset, ics->offset + ics->nr_irqs - 1, ics);
if (!ics->irqs) {
continue;
}
for (i = 0; i < ics->nr_irqs; i++) {
ICSIRQState *irq = ics->irqs + i;
if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
continue;
}
monitor_printf(mon, " %4x %s %02x %02x\n",
ics->offset + i,
(irq->flags & XICS_FLAGS_IRQ_LSI) ?
"LSI" : "MSI",
irq->priority, irq->status);
}
monitor_printf(mon, " %4x %s %02x %02x\n",
ics->offset + i,
(irq->flags & XICS_FLAGS_IRQ_LSI) ?
"LSI" : "MSI",
irq->priority, irq->status);
}
}
/*
* XICS Common class - parent for emulated XICS and KVM-XICS
*/
static void xics_common_reset(DeviceState *d)
{
XICSState *xics = XICS_COMMON(d);
ICSState *ics;
int i;
for (i = 0; i < xics->nr_servers; i++) {
device_reset(DEVICE(&xics->ss[i]));
}
QLIST_FOREACH(ics, &xics->ics, list) {
device_reset(DEVICE(ics));
}
}
static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
XICSState *xics = XICS_COMMON(obj);
int64_t value = xics->nr_irqs;
visit_type_int(v, name, &value, errp);
}
static void xics_prop_set_nr_irqs(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
XICSState *xics = XICS_COMMON(obj);
XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
Error *error = NULL;
int64_t value;
visit_type_int(v, name, &value, &error);
if (error) {
error_propagate(errp, error);
return;
}
if (xics->nr_irqs) {
error_setg(errp, "Number of interrupts is already set to %u",
xics->nr_irqs);
return;
}
assert(info->set_nr_irqs);
info->set_nr_irqs(xics, value, errp);
}
void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
const char *typename, Error **errp)
{
int i;
xics->nr_servers = nr_servers;
xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
for (i = 0; i < xics->nr_servers; i++) {
char name[32];
ICPState *icp = &xics->ss[i];
object_initialize(icp, sizeof(*icp), typename);
snprintf(name, sizeof(name), "icp[%d]", i);
object_property_add_child(OBJECT(xics), name, OBJECT(icp), errp);
icp->xics = xics;
}
}
static void xics_prop_get_nr_servers(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
XICSState *xics = XICS_COMMON(obj);
int64_t value = xics->nr_servers;
visit_type_int(v, name, &value, errp);
}
static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
XICSState *xics = XICS_COMMON(obj);
XICSStateClass *xsc = XICS_COMMON_GET_CLASS(xics);
Error *error = NULL;
int64_t value;
visit_type_int(v, name, &value, &error);
if (error) {
error_propagate(errp, error);
return;
}
if (xics->nr_servers) {
error_setg(errp, "Number of servers is already set to %u",
xics->nr_servers);
return;
}
assert(xsc->set_nr_servers);
xsc->set_nr_servers(xics, value, errp);
}
static void xics_common_initfn(Object *obj)
{
XICSState *xics = XICS_COMMON(obj);
QLIST_INIT(&xics->ics);
object_property_add(obj, "nr_irqs", "int",
xics_prop_get_nr_irqs, xics_prop_set_nr_irqs,
NULL, NULL, NULL);
object_property_add(obj, "nr_servers", "int",
xics_prop_get_nr_servers, xics_prop_set_nr_servers,
NULL, NULL, NULL);
}
static void xics_common_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(oc);
dc->reset = xics_common_reset;
ic->print_info = xics_common_pic_print_info;
}
static const TypeInfo xics_common_info = {
.name = TYPE_XICS_COMMON,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(XICSState),
.class_size = sizeof(XICSStateClass),
.instance_init = xics_common_initfn,
.class_init = xics_common_class_init,
.interfaces = (InterfaceInfo[]) {
{ TYPE_INTERRUPT_STATS_PROVIDER },
{ }
},
};
/*
* ICP: Presentation layer
*/
@@ -278,8 +137,8 @@ static const TypeInfo xics_common_info = {
#define XISR_MASK 0x00ffffff
#define CPPR_MASK 0xff000000
#define XISR(ss) (((ss)->xirr) & XISR_MASK)
#define CPPR(ss) (((ss)->xirr) >> 24)
#define XISR(icp) (((icp)->xirr) & XISR_MASK)
#define CPPR(icp) (((icp)->xirr) >> 24)
static void ics_reject(ICSState *ics, uint32_t nr)
{
@@ -290,7 +149,7 @@ static void ics_reject(ICSState *ics, uint32_t nr)
}
}
static void ics_resend(ICSState *ics)
void ics_resend(ICSState *ics)
{
ICSStateClass *k = ICS_BASE_GET_CLASS(ics);
@@ -308,151 +167,152 @@ static void ics_eoi(ICSState *ics, int nr)
}
}
static void icp_check_ipi(ICPState *ss)
static void icp_check_ipi(ICPState *icp)
{
if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
if (XISR(icp) && (icp->pending_priority <= icp->mfrr)) {
return;
}
trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr);
trace_xics_icp_check_ipi(icp->cs->cpu_index, icp->mfrr);
if (XISR(ss) && ss->xirr_owner) {
ics_reject(ss->xirr_owner, XISR(ss));
if (XISR(icp) && icp->xirr_owner) {
ics_reject(icp->xirr_owner, XISR(icp));
}
ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
ss->pending_priority = ss->mfrr;
ss->xirr_owner = NULL;
qemu_irq_raise(ss->output);
icp->xirr = (icp->xirr & ~XISR_MASK) | XICS_IPI;
icp->pending_priority = icp->mfrr;
icp->xirr_owner = NULL;
qemu_irq_raise(icp->output);
}
static void icp_resend(ICPState *ss)
void icp_resend(ICPState *icp)
{
ICSState *ics;
XICSFabric *xi = icp->xics;
XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
if (ss->mfrr < CPPR(ss)) {
icp_check_ipi(ss);
}
QLIST_FOREACH(ics, &ss->xics->ics, list) {
ics_resend(ics);
if (icp->mfrr < CPPR(icp)) {
icp_check_ipi(icp);
}
xic->ics_resend(xi);
}
void icp_set_cppr(ICPState *ss, uint8_t cppr)
void icp_set_cppr(ICPState *icp, uint8_t cppr)
{
uint8_t old_cppr;
uint32_t old_xisr;
old_cppr = CPPR(ss);
ss->xirr = (ss->xirr & ~CPPR_MASK) | (cppr << 24);
old_cppr = CPPR(icp);
icp->xirr = (icp->xirr & ~CPPR_MASK) | (cppr << 24);
if (cppr < old_cppr) {
if (XISR(ss) && (cppr <= ss->pending_priority)) {
old_xisr = XISR(ss);
ss->xirr &= ~XISR_MASK; /* Clear XISR */
ss->pending_priority = 0xff;
qemu_irq_lower(ss->output);
if (ss->xirr_owner) {
ics_reject(ss->xirr_owner, old_xisr);
ss->xirr_owner = NULL;
if (XISR(icp) && (cppr <= icp->pending_priority)) {
old_xisr = XISR(icp);
icp->xirr &= ~XISR_MASK; /* Clear XISR */
icp->pending_priority = 0xff;
qemu_irq_lower(icp->output);
if (icp->xirr_owner) {
ics_reject(icp->xirr_owner, old_xisr);
icp->xirr_owner = NULL;
}
}
} else {
if (!XISR(ss)) {
icp_resend(ss);
if (!XISR(icp)) {
icp_resend(icp);
}
}
}
void icp_set_mfrr(ICPState *ss, uint8_t mfrr)
void icp_set_mfrr(ICPState *icp, uint8_t mfrr)
{
ss->mfrr = mfrr;
if (mfrr < CPPR(ss)) {
icp_check_ipi(ss);
icp->mfrr = mfrr;
if (mfrr < CPPR(icp)) {
icp_check_ipi(icp);
}
}
uint32_t icp_accept(ICPState *ss)
uint32_t icp_accept(ICPState *icp)
{
uint32_t xirr = ss->xirr;
uint32_t xirr = icp->xirr;
qemu_irq_lower(ss->output);
ss->xirr = ss->pending_priority << 24;
ss->pending_priority = 0xff;
ss->xirr_owner = NULL;
qemu_irq_lower(icp->output);
icp->xirr = icp->pending_priority << 24;
icp->pending_priority = 0xff;
icp->xirr_owner = NULL;
trace_xics_icp_accept(xirr, ss->xirr);
trace_xics_icp_accept(xirr, icp->xirr);
return xirr;
}
uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
uint32_t icp_ipoll(ICPState *icp, uint32_t *mfrr)
{
if (mfrr) {
*mfrr = ss->mfrr;
*mfrr = icp->mfrr;
}
return ss->xirr;
return icp->xirr;
}
void icp_eoi(ICPState *ss, uint32_t xirr)
void icp_eoi(ICPState *icp, uint32_t xirr)
{
XICSFabric *xi = icp->xics;
XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
ICSState *ics;
uint32_t irq;
/* Send EOI -> ICS */
ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
trace_xics_icp_eoi(ss->cs->cpu_index, xirr, ss->xirr);
icp->xirr = (icp->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
trace_xics_icp_eoi(icp->cs->cpu_index, xirr, icp->xirr);
irq = xirr & XISR_MASK;
QLIST_FOREACH(ics, &ss->xics->ics, list) {
if (ics_valid_irq(ics, irq)) {
ics_eoi(ics, irq);
}
ics = xic->ics_get(xi, irq);
if (ics) {
ics_eoi(ics, irq);
}
if (!XISR(ss)) {
icp_resend(ss);
if (!XISR(icp)) {
icp_resend(icp);
}
}
static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
{
XICSState *xics = ics->xics;
ICPState *ss = xics->ss + server;
ICPState *icp = xics_icp_get(ics->xics, server);
trace_xics_icp_irq(server, nr, priority);
if ((priority >= CPPR(ss))
|| (XISR(ss) && (ss->pending_priority <= priority))) {
if ((priority >= CPPR(icp))
|| (XISR(icp) && (icp->pending_priority <= priority))) {
ics_reject(ics, nr);
} else {
if (XISR(ss) && ss->xirr_owner) {
ics_reject(ss->xirr_owner, XISR(ss));
ss->xirr_owner = NULL;
if (XISR(icp) && icp->xirr_owner) {
ics_reject(icp->xirr_owner, XISR(icp));
icp->xirr_owner = NULL;
}
ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
ss->xirr_owner = ics;
ss->pending_priority = priority;
trace_xics_icp_raise(ss->xirr, ss->pending_priority);
qemu_irq_raise(ss->output);
icp->xirr = (icp->xirr & ~XISR_MASK) | (nr & XISR_MASK);
icp->xirr_owner = ics;
icp->pending_priority = priority;
trace_xics_icp_raise(icp->xirr, icp->pending_priority);
qemu_irq_raise(icp->output);
}
}
static void icp_dispatch_pre_save(void *opaque)
{
ICPState *ss = opaque;
ICPStateClass *info = ICP_GET_CLASS(ss);
ICPState *icp = opaque;
ICPStateClass *info = ICP_GET_CLASS(icp);
if (info->pre_save) {
info->pre_save(ss);
info->pre_save(icp);
}
}
static int icp_dispatch_post_load(void *opaque, int version_id)
{
ICPState *ss = opaque;
ICPStateClass *info = ICP_GET_CLASS(ss);
ICPState *icp = opaque;
ICPStateClass *info = ICP_GET_CLASS(icp);
if (info->post_load) {
return info->post_load(ss, version_id);
return info->post_load(icp, version_id);
}
return 0;
@@ -485,12 +345,30 @@ static void icp_reset(DeviceState *dev)
qemu_set_irq(icp->output, 0);
}
static void icp_realize(DeviceState *dev, Error **errp)
{
ICPState *icp = ICP(dev);
Object *obj;
Error *err = NULL;
obj = object_property_get_link(OBJECT(dev), "xics", &err);
if (!obj) {
error_setg(errp, "%s: required link 'xics' not found: %s",
__func__, error_get_pretty(err));
return;
}
icp->xics = XICS_FABRIC(obj);
}
static void icp_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->reset = icp_reset;
dc->vmsd = &vmstate_icp_server;
dc->realize = icp_realize;
}
static const TypeInfo icp_info = {
@@ -663,17 +541,6 @@ static void ics_simple_reset(DeviceState *dev)
}
}
static int ics_simple_post_load(ICSState *ics, int version_id)
{
int i;
for (i = 0; i < ics->xics->nr_servers; i++) {
icp_resend(&ics->xics->ss[i]);
}
return 0;
}
static void ics_simple_dispatch_pre_save(void *opaque)
{
ICSState *ics = opaque;
@@ -746,15 +613,20 @@ static void ics_simple_realize(DeviceState *dev, Error **errp)
ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs);
}
static Property ics_simple_properties[] = {
DEFINE_PROP_UINT32("nr-irqs", ICSState, nr_irqs, 0),
DEFINE_PROP_END_OF_LIST(),
};
static void ics_simple_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
ICSStateClass *isc = ICS_BASE_CLASS(klass);
dc->realize = ics_simple_realize;
isc->realize = ics_simple_realize;
dc->props = ics_simple_properties;
dc->vmsd = &vmstate_ics_simple;
dc->reset = ics_simple_reset;
isc->post_load = ics_simple_post_load;
isc->reject = ics_simple_reject;
isc->resend = ics_simple_resend;
isc->eoi = ics_simple_eoi;
@@ -769,32 +641,56 @@ static const TypeInfo ics_simple_info = {
.instance_init = ics_simple_initfn,
};
static void ics_base_realize(DeviceState *dev, Error **errp)
{
ICSStateClass *icsc = ICS_BASE_GET_CLASS(dev);
ICSState *ics = ICS_BASE(dev);
Object *obj;
Error *err = NULL;
obj = object_property_get_link(OBJECT(dev), "xics", &err);
if (!obj) {
error_setg(errp, "%s: required link 'xics' not found: %s",
__func__, error_get_pretty(err));
return;
}
ics->xics = XICS_FABRIC(obj);
if (icsc->realize) {
icsc->realize(dev, errp);
}
}
static void ics_base_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = ics_base_realize;
}
static const TypeInfo ics_base_info = {
.name = TYPE_ICS_BASE,
.parent = TYPE_DEVICE,
.abstract = true,
.instance_size = sizeof(ICSState),
.class_init = ics_base_class_init,
.class_size = sizeof(ICSStateClass),
};
static const TypeInfo xics_fabric_info = {
.name = TYPE_XICS_FABRIC,
.parent = TYPE_INTERFACE,
.class_size = sizeof(XICSFabricClass),
};
/*
* Exported functions
*/
ICSState *xics_find_source(XICSState *xics, int irq)
qemu_irq xics_get_qirq(XICSFabric *xi, int irq)
{
ICSState *ics;
QLIST_FOREACH(ics, &xics->ics, list) {
if (ics_valid_irq(ics, irq)) {
return ics;
}
}
return NULL;
}
qemu_irq xics_get_qirq(XICSState *xics, int irq)
{
ICSState *ics = xics_find_source(xics, irq);
XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
ICSState *ics = xic->ics_get(xi, irq);
if (ics) {
return ics->qirqs[irq - ics->offset];
@@ -803,6 +699,13 @@ qemu_irq xics_get_qirq(XICSState *xics, int irq)
return NULL;
}
ICPState *xics_icp_get(XICSFabric *xi, int server)
{
XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
return xic->icp_get(xi, server);
}
void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
{
assert(!(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK));
@@ -813,10 +716,10 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
static void xics_register_types(void)
{
type_register_static(&xics_common_info);
type_register_static(&ics_simple_info);
type_register_static(&ics_base_info);
type_register_static(&icp_info);
type_register_static(&xics_fabric_info);
}
type_init(xics_register_types)

View File

@@ -40,16 +40,12 @@
#include <sys/ioctl.h>
typedef struct KVMXICSState {
XICSState parent_obj;
int kernel_xics_fd;
} KVMXICSState;
static int kernel_xics_fd = -1;
/*
* ICP-KVM
*/
static void icp_get_kvm_state(ICPState *ss)
static void icp_get_kvm_state(ICPState *icp)
{
uint64_t state;
struct kvm_one_reg reg = {
@@ -59,25 +55,25 @@ static void icp_get_kvm_state(ICPState *ss)
int ret;
/* ICP for this CPU thread is not in use, exiting */
if (!ss->cs) {
if (!icp->cs) {
return;
}
ret = kvm_vcpu_ioctl(ss->cs, KVM_GET_ONE_REG, &reg);
ret = kvm_vcpu_ioctl(icp->cs, KVM_GET_ONE_REG, &reg);
if (ret != 0) {
error_report("Unable to retrieve KVM interrupt controller state"
" for CPU %ld: %s", kvm_arch_vcpu_id(ss->cs), strerror(errno));
" for CPU %ld: %s", kvm_arch_vcpu_id(icp->cs), strerror(errno));
exit(1);
}
ss->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
ss->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
icp->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
icp->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
& KVM_REG_PPC_ICP_MFRR_MASK;
ss->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
icp->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
& KVM_REG_PPC_ICP_PPRI_MASK;
}
static int icp_set_kvm_state(ICPState *ss, int version_id)
static int icp_set_kvm_state(ICPState *icp, int version_id)
{
uint64_t state;
struct kvm_one_reg reg = {
@@ -87,18 +83,18 @@ static int icp_set_kvm_state(ICPState *ss, int version_id)
int ret;
/* ICP for this CPU thread is not in use, exiting */
if (!ss->cs) {
if (!icp->cs) {
return 0;
}
state = ((uint64_t)ss->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
| ((uint64_t)ss->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
| ((uint64_t)ss->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
state = ((uint64_t)icp->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
| ((uint64_t)icp->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
| ((uint64_t)icp->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
ret = kvm_vcpu_ioctl(ss->cs, KVM_SET_ONE_REG, &reg);
ret = kvm_vcpu_ioctl(icp->cs, KVM_SET_ONE_REG, &reg);
if (ret != 0) {
error_report("Unable to restore KVM interrupt controller state (0x%"
PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(ss->cs),
PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(icp->cs),
strerror(errno));
return ret;
}
@@ -122,6 +118,34 @@ static void icp_kvm_reset(DeviceState *dev)
icp_set_kvm_state(icp, 1);
}
static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
int ret;
if (kernel_xics_fd == -1) {
abort();
}
/*
* If we are reusing a parked vCPU fd corresponding to the CPU
* which was hot-removed earlier we don't have to renable
* KVM_CAP_IRQ_XICS capability again.
*/
if (icp->cap_irq_xics_enabled) {
return;
}
ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
kvm_arch_vcpu_id(cs));
if (ret < 0) {
error_report("Unable to connect CPU%ld to kernel XICS: %s",
kvm_arch_vcpu_id(cs), strerror(errno));
exit(1);
}
icp->cap_irq_xics_enabled = true;
}
static void icp_kvm_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -130,6 +154,7 @@ static void icp_kvm_class_init(ObjectClass *klass, void *data)
dc->reset = icp_kvm_reset;
icpc->pre_save = icp_get_kvm_state;
icpc->post_load = icp_set_kvm_state;
icpc->cpu_setup = icp_kvm_cpu_setup;
}
static const TypeInfo icp_kvm_info = {
@@ -145,7 +170,6 @@ static const TypeInfo icp_kvm_info = {
*/
static void ics_get_kvm_state(ICSState *ics)
{
KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
uint64_t state;
struct kvm_device_attr attr = {
.flags = 0,
@@ -160,7 +184,7 @@ static void ics_get_kvm_state(ICSState *ics)
attr.attr = i + ics->offset;
ret = ioctl(xicskvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
ret = ioctl(kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
if (ret != 0) {
error_report("Unable to retrieve KVM interrupt controller state"
" for IRQ %d: %s", i + ics->offset, strerror(errno));
@@ -204,7 +228,6 @@ static void ics_get_kvm_state(ICSState *ics)
static int ics_set_kvm_state(ICSState *ics, int version_id)
{
KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
uint64_t state;
struct kvm_device_attr attr = {
.flags = 0,
@@ -238,7 +261,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
}
}
ret = ioctl(xicskvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
ret = ioctl(kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
if (ret != 0) {
error_report("Unable to restore KVM interrupt controller state"
" for IRQs %d: %s", i + ics->offset, strerror(errno));
@@ -308,7 +331,7 @@ static void ics_kvm_class_init(ObjectClass *klass, void *data)
DeviceClass *dc = DEVICE_CLASS(klass);
ICSStateClass *icsc = ICS_BASE_CLASS(klass);
dc->realize = ics_kvm_realize;
icsc->realize = ics_kvm_realize;
dc->reset = ics_kvm_reset;
icsc->pre_save = ics_get_kvm_state;
icsc->post_load = ics_set_kvm_state;
@@ -324,57 +347,6 @@ static const TypeInfo ics_kvm_info = {
/*
* XICS-KVM
*/
static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
{
CPUState *cs;
ICPState *ss;
KVMXICSState *xicskvm = XICS_SPAPR_KVM(xics);
int ret;
cs = CPU(cpu);
ss = &xics->ss[cs->cpu_index];
assert(cs->cpu_index < xics->nr_servers);
if (xicskvm->kernel_xics_fd == -1) {
abort();
}
/*
* If we are reusing a parked vCPU fd corresponding to the CPU
* which was hot-removed earlier we don't have to renable
* KVM_CAP_IRQ_XICS capability again.
*/
if (ss->cap_irq_xics_enabled) {
return;
}
ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, xicskvm->kernel_xics_fd,
kvm_arch_vcpu_id(cs));
if (ret < 0) {
error_report("Unable to connect CPU%ld to kernel XICS: %s",
kvm_arch_vcpu_id(cs), strerror(errno));
exit(1);
}
ss->cap_irq_xics_enabled = true;
}
static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
Error **errp)
{
ICSState *ics = QLIST_FIRST(&xics->ics);
/* This needs to be deprecated ... */
xics->nr_irqs = nr_irqs;
if (ics) {
ics->nr_irqs = nr_irqs;
}
}
static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers,
Error **errp)
{
xics_set_nr_servers(xics, nr_servers, TYPE_KVM_ICP, errp);
}
static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
@@ -385,13 +357,9 @@ static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
__func__);
}
static void xics_kvm_realize(DeviceState *dev, Error **errp)
int xics_kvm_init(sPAPRMachineState *spapr, Error **errp)
{
KVMXICSState *xicskvm = XICS_SPAPR_KVM(dev);
XICSState *xics = XICS_COMMON(dev);
ICSState *ics;
int i, rc;
Error *error = NULL;
int rc;
struct kvm_create_device xics_create_device = {
.type = KVM_DEV_TYPE_XICS,
.flags = 0,
@@ -439,72 +407,24 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
goto fail;
}
xicskvm->kernel_xics_fd = xics_create_device.fd;
QLIST_FOREACH(ics, &xics->ics, list) {
object_property_set_bool(OBJECT(ics), true, "realized", &error);
if (error) {
error_propagate(errp, error);
goto fail;
}
}
assert(xics->nr_servers);
for (i = 0; i < xics->nr_servers; i++) {
object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
&error);
if (error) {
error_propagate(errp, error);
goto fail;
}
}
kernel_xics_fd = xics_create_device.fd;
kvm_kernel_irqchip = true;
kvm_msi_via_irqfd_allowed = true;
kvm_gsi_direct_mapping = true;
return;
return rc;
fail:
kvmppc_define_rtas_kernel_token(0, "ibm,set-xive");
kvmppc_define_rtas_kernel_token(0, "ibm,get-xive");
kvmppc_define_rtas_kernel_token(0, "ibm,int-on");
kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
return -1;
}
static void xics_kvm_initfn(Object *obj)
{
XICSState *xics = XICS_COMMON(obj);
ICSState *ics;
ics = ICS_SIMPLE(object_new(TYPE_ICS_KVM));
object_property_add_child(obj, "ics", OBJECT(ics), NULL);
ics->xics = xics;
QLIST_INSERT_HEAD(&xics->ics, ics, list);
}
static void xics_kvm_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
XICSStateClass *xsc = XICS_COMMON_CLASS(oc);
dc->realize = xics_kvm_realize;
xsc->cpu_setup = xics_kvm_cpu_setup;
xsc->set_nr_irqs = xics_kvm_set_nr_irqs;
xsc->set_nr_servers = xics_kvm_set_nr_servers;
}
static const TypeInfo xics_spapr_kvm_info = {
.name = TYPE_XICS_SPAPR_KVM,
.parent = TYPE_XICS_COMMON,
.instance_size = sizeof(KVMXICSState),
.class_init = xics_kvm_class_init,
.instance_init = xics_kvm_initfn,
};
static void xics_kvm_register_types(void)
{
type_register_static(&xics_spapr_kvm_info);
type_register_static(&ics_kvm_info);
type_register_static(&icp_kvm_info);
}

View File

@@ -44,7 +44,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
ICPState *icp = &spapr->xics->ss[cs->cpu_index];
ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
target_ulong cppr = args[0];
icp_set_cppr(icp, cppr);
@@ -56,12 +56,13 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
{
target_ulong server = xics_get_cpu_index_by_dt_id(args[0]);
target_ulong mfrr = args[1];
ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), server);
if (server >= spapr->xics->nr_servers) {
if (!icp) {
return H_PARAMETER;
}
icp_set_mfrr(spapr->xics->ss + server, mfrr);
icp_set_mfrr(icp, mfrr);
return H_SUCCESS;
}
@@ -69,7 +70,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
ICPState *icp = &spapr->xics->ss[cs->cpu_index];
ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
uint32_t xirr = icp_accept(icp);
args[0] = xirr;
@@ -80,7 +81,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
ICPState *icp = &spapr->xics->ss[cs->cpu_index];
ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
uint32_t xirr = icp_accept(icp);
args[0] = xirr;
@@ -92,7 +93,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
ICPState *icp = &spapr->xics->ss[cs->cpu_index];
ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
target_ulong xirr = args[0];
icp_eoi(icp, xirr);
@@ -103,7 +104,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
ICPState *icp = &spapr->xics->ss[cs->cpu_index];
ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
uint32_t mfrr;
uint32_t xirr = icp_ipoll(icp, &mfrr);
@@ -118,7 +119,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
ICSState *ics = spapr->ics;
uint32_t nr, srcno, server, priority;
if ((nargs != 3) || (nret != 1)) {
@@ -134,7 +135,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
server = xics_get_cpu_index_by_dt_id(rtas_ld(args, 1));
priority = rtas_ld(args, 2);
if (!ics_valid_irq(ics, nr) || (server >= ics->xics->nr_servers)
if (!ics_valid_irq(ics, nr) || !xics_icp_get(XICS_FABRIC(spapr), server)
|| (priority > 0xff)) {
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
@@ -151,7 +152,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
ICSState *ics = spapr->ics;
uint32_t nr, srcno;
if ((nargs != 1) || (nret != 3)) {
@@ -181,7 +182,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
ICSState *ics = spapr->ics;
uint32_t nr, srcno;
if ((nargs != 1) || (nret != 1)) {
@@ -212,7 +213,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
ICSState *ics = spapr->ics;
uint32_t nr, srcno;
if ((nargs != 1) || (nret != 1)) {
@@ -239,36 +240,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
}
static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
Error **errp)
int xics_spapr_init(sPAPRMachineState *spapr, Error **errp)
{
ICSState *ics = QLIST_FIRST(&xics->ics);
/* This needs to be deprecated ... */
xics->nr_irqs = nr_irqs;
if (ics) {
ics->nr_irqs = nr_irqs;
}
}
static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers,
Error **errp)
{
xics_set_nr_servers(xics, nr_servers, TYPE_ICP, errp);
}
static void xics_spapr_realize(DeviceState *dev, Error **errp)
{
XICSState *xics = XICS_SPAPR(dev);
ICSState *ics;
Error *error = NULL;
int i;
if (!xics->nr_servers) {
error_setg(errp, "Number of servers needs to be greater 0");
return;
}
/* Registration of global state belongs into realize */
spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
@@ -281,55 +254,9 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
spapr_register_hypercall(H_XIRR_X, h_xirr_x);
spapr_register_hypercall(H_EOI, h_eoi);
spapr_register_hypercall(H_IPOLL, h_ipoll);
QLIST_FOREACH(ics, &xics->ics, list) {
object_property_set_bool(OBJECT(ics), true, "realized", &error);
if (error) {
error_propagate(errp, error);
return;
}
}
for (i = 0; i < xics->nr_servers; i++) {
object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
&error);
if (error) {
error_propagate(errp, error);
return;
}
}
return 0;
}
static void xics_spapr_initfn(Object *obj)
{
XICSState *xics = XICS_SPAPR(obj);
ICSState *ics;
ics = ICS_SIMPLE(object_new(TYPE_ICS_SIMPLE));
object_property_add_child(obj, "ics", OBJECT(ics), NULL);
ics->xics = xics;
QLIST_INSERT_HEAD(&xics->ics, ics, list);
}
static void xics_spapr_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
dc->realize = xics_spapr_realize;
xsc->set_nr_irqs = xics_spapr_set_nr_irqs;
xsc->set_nr_servers = xics_spapr_set_nr_servers;
}
static const TypeInfo xics_spapr_info = {
.name = TYPE_XICS_SPAPR,
.parent = TYPE_XICS_COMMON,
.instance_size = sizeof(XICSState),
.class_size = sizeof(XICSStateClass),
.class_init = xics_spapr_class_init,
.instance_init = xics_spapr_initfn,
};
#define ICS_IRQ_FREE(ics, srcno) \
(!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
@@ -354,9 +281,8 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
return -1;
}
int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp)
{
ICSState *ics = QLIST_FIRST(&xics->ics);
int irq;
if (!ics) {
@@ -387,10 +313,9 @@ int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
* Allocate block of consecutive IRQs, and return the number of the first IRQ in
* the block. If align==true, aligns the first IRQ number to num.
*/
int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align,
Error **errp)
int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
bool align, Error **errp)
{
ICSState *ics = QLIST_FIRST(&xics->ics);
int i, first = -1;
if (!ics) {
@@ -440,20 +365,18 @@ static void ics_free(ICSState *ics, int srcno, int num)
}
}
void xics_spapr_free(XICSState *xics, int irq, int num)
void spapr_ics_free(ICSState *ics, int irq, int num)
{
ICSState *ics = xics_find_source(xics, irq);
if (ics) {
if (ics_valid_irq(ics, irq)) {
trace_xics_ics_free(0, irq, num);
ics_free(ics, irq - ics->offset, num);
}
}
void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle)
{
uint32_t interrupt_server_ranges_prop[] = {
0, cpu_to_be32(xics->nr_servers),
0, cpu_to_be32(nr_servers),
};
int node;
@@ -470,10 +393,3 @@ void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
_FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle));
_FDT(fdt_setprop_cell(fdt, node, "phandle", phandle));
}
static void xics_spapr_register_types(void)
{
type_register_static(&xics_spapr_info);
}
type_init(xics_spapr_register_types)

View File

@@ -141,9 +141,17 @@ static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr,
static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp)
{
sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev);
int ret;
if (nvram->blk) {
nvram->size = blk_getlength(nvram->blk);
ret = blk_set_perm(nvram->blk,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
BLK_PERM_ALL, errp);
if (ret < 0) {
return;
}
} else {
nvram->size = DEFAULT_NVRAM_SIZE;
}

View File

@@ -1530,6 +1530,34 @@ static const pci_class_desc pci_class_descriptions[] =
{ 0, NULL}
};
static void pci_for_each_device_under_bus_reverse(PCIBus *bus,
void (*fn)(PCIBus *b,
PCIDevice *d,
void *opaque),
void *opaque)
{
PCIDevice *d;
int devfn;
for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
d = bus->devices[ARRAY_SIZE(bus->devices) - 1 - devfn];
if (d) {
fn(bus, d, opaque);
}
}
}
void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
void *opaque)
{
bus = pci_find_bus_nr(bus, bus_num);
if (bus) {
pci_for_each_device_under_bus_reverse(bus, fn, opaque);
}
}
static void pci_for_each_device_under_bus(PCIBus *bus,
void (*fn)(PCIBus *b, PCIDevice *d,
void *opaque),

View File

@@ -63,6 +63,7 @@
#include "qemu/error-report.h"
#include "trace.h"
#include "hw/nmi.h"
#include "hw/intc/intc.h"
#include "hw/compat.h"
#include "qemu/cutils.h"
@@ -95,37 +96,68 @@
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
static XICSState *try_create_xics(const char *type, int nr_servers,
int nr_irqs, Error **errp)
static int try_create_xics(sPAPRMachineState *spapr, const char *type_ics,
const char *type_icp, int nr_servers,
int nr_irqs, Error **errp)
{
Error *err = NULL;
DeviceState *dev;
XICSFabric *xi = XICS_FABRIC(spapr);
Error *err = NULL, *local_err = NULL;
ICSState *ics = NULL;
int i;
dev = qdev_create(NULL, type);
qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
object_property_set_bool(OBJECT(dev), true, "realized", &err);
ics = ICS_SIMPLE(object_new(type_ics));
qdev_set_parent_bus(DEVICE(ics), sysbus_get_default());
object_property_add_child(OBJECT(spapr), "ics", OBJECT(ics), NULL);
object_property_set_int(OBJECT(ics), nr_irqs, "nr-irqs", &err);
object_property_add_const_link(OBJECT(ics), "xics", OBJECT(xi), NULL);
object_property_set_bool(OBJECT(ics), true, "realized", &local_err);
error_propagate(&err, local_err);
if (err) {
error_propagate(errp, err);
object_unparent(OBJECT(dev));
return NULL;
goto error;
}
return XICS_COMMON(dev);
spapr->icps = g_malloc0(nr_servers * sizeof(ICPState));
spapr->nr_servers = nr_servers;
for (i = 0; i < nr_servers; i++) {
ICPState *icp = &spapr->icps[i];
object_initialize(icp, sizeof(*icp), type_icp);
qdev_set_parent_bus(DEVICE(icp), sysbus_get_default());
object_property_add_child(OBJECT(spapr), "icp[*]", OBJECT(icp), NULL);
object_property_add_const_link(OBJECT(icp), "xics", OBJECT(xi), NULL);
object_property_set_bool(OBJECT(icp), true, "realized", &err);
if (err) {
goto error;
}
object_unref(OBJECT(icp));
}
spapr->ics = ics;
return 0;
error:
error_propagate(errp, err);
if (ics) {
object_unparent(OBJECT(ics));
}
return -1;
}
static XICSState *xics_system_init(MachineState *machine,
int nr_servers, int nr_irqs, Error **errp)
static int xics_system_init(MachineState *machine,
int nr_servers, int nr_irqs, Error **errp)
{
XICSState *xics = NULL;
int rc = -1;
if (kvm_enabled()) {
Error *err = NULL;
if (machine_kernel_irqchip_allowed(machine)) {
xics = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs,
&err);
if (machine_kernel_irqchip_allowed(machine) &&
!xics_kvm_init(SPAPR_MACHINE(machine), errp)) {
rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_KVM,
TYPE_KVM_ICP, nr_servers, nr_irqs, &err);
}
if (machine_kernel_irqchip_required(machine) && !xics) {
if (machine_kernel_irqchip_required(machine) && rc < 0) {
error_reportf_err(err,
"kernel_irqchip requested but unavailable: ");
} else {
@@ -133,11 +165,13 @@ static XICSState *xics_system_init(MachineState *machine,
}
}
if (!xics) {
xics = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, errp);
if (rc < 0) {
xics_spapr_init(SPAPR_MACHINE(machine), errp);
rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_SIMPLE,
TYPE_ICP, nr_servers, nr_irqs, errp);
}
return xics;
return rc;
}
static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
@@ -924,7 +958,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
_FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
/* /interrupt controller */
spapr_dt_xics(spapr->xics, fdt, PHANDLE_XICP);
spapr_dt_xics(spapr->nr_servers, fdt, PHANDLE_XICP);
ret = spapr_populate_memory(spapr, fdt);
if (ret < 0) {
@@ -1053,6 +1087,62 @@ static void close_htab_fd(sPAPRMachineState *spapr)
spapr->htab_fd = -1;
}
static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1;
}
static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp,
hwaddr ptex, int n)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
if (!spapr->htab) {
/*
* HTAB is controlled by KVM. Fetch into temporary buffer
*/
ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64);
kvmppc_read_hptes(hptes, ptex, n);
return hptes;
}
/*
* HTAB is controlled by QEMU. Just point to the internally
* accessible PTEG.
*/
return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset);
}
static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
const ppc_hash_pte64_t *hptes,
hwaddr ptex, int n)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
if (!spapr->htab) {
g_free((void *)hptes);
}
/* Nothing to do for qemu managed HPT */
}
static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
uint64_t pte0, uint64_t pte1)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
hwaddr offset = ptex * HASH_PTE_SIZE_64;
if (!spapr->htab) {
kvmppc_write_hpte(ptex, pte0, pte1);
} else {
stq_p(spapr->htab + offset, pte0);
stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
}
}
static int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
{
int shift;
@@ -1252,6 +1342,13 @@ static int spapr_post_load(void *opaque, int version_id)
sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
int err = 0;
if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
int i;
for (i = 0; i < spapr->nr_servers; i++) {
icp_resend(&spapr->icps[i]);
}
}
/* In earlier versions, there was no separate qdev for the PAPR
* RTC, so the RTC offset was stored directly in sPAPREnvironment.
* So when migrating from those versions, poke the incoming offset
@@ -1902,9 +1999,8 @@ static void ppc_spapr_init(MachineState *machine)
load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
/* Set up Interrupt Controller before we create the VCPUs */
spapr->xics = xics_system_init(machine,
DIV_ROUND_UP(max_cpus * smt, smp_threads),
XICS_IRQS_SPAPR, &error_fatal);
xics_system_init(machine, DIV_ROUND_UP(max_cpus * smt, smp_threads),
XICS_IRQS_SPAPR, &error_fatal);
/* Set up containers for ibm,client-set-architecture negotiated options */
spapr->ov5 = spapr_ovec_new();
@@ -2872,6 +2968,40 @@ static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
*mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
}
static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL;
}
static void spapr_ics_resend(XICSFabric *dev)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
ics_resend(spapr->ics);
}
static ICPState *spapr_icp_get(XICSFabric *xi, int server)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
return (server < spapr->nr_servers) ? &spapr->icps[server] : NULL;
}
static void spapr_pic_print_info(InterruptStatsProvider *obj,
Monitor *mon)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
int i;
for (i = 0; i < spapr->nr_servers; i++) {
icp_pic_print_info(&spapr->icps[i], mon);
}
ics_pic_print_info(spapr->ics, mon);
}
static void spapr_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -2880,6 +3010,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
NMIClass *nc = NMI_CLASS(oc);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
mc->desc = "pSeries Logical Partition (PAPR compliant)";
@@ -2891,7 +3023,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
mc->init = ppc_spapr_init;
mc->reset = ppc_spapr_reset;
mc->block_default_type = IF_SCSI;
mc->max_cpus = 255;
mc->max_cpus = 1024;
mc->no_parallel = 1;
mc->default_boot_order = "";
mc->default_ram_size = 512 * M_BYTE;
@@ -2913,6 +3045,14 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
nc->nmi_monitor_handler = spapr_nmi;
smc->phb_placement = spapr_phb_placement;
vhc->hypercall = emulate_spapr_hypercall;
vhc->hpt_mask = spapr_hpt_mask;
vhc->map_hptes = spapr_map_hptes;
vhc->unmap_hptes = spapr_unmap_hptes;
vhc->store_hpte = spapr_store_hpte;
xic->ics_get = spapr_ics_get;
xic->ics_resend = spapr_ics_resend;
xic->icp_get = spapr_icp_get;
ispc->print_info = spapr_pic_print_info;
}
static const TypeInfo spapr_machine_info = {
@@ -2929,6 +3069,8 @@ static const TypeInfo spapr_machine_info = {
{ TYPE_NMI },
{ TYPE_HOTPLUG_HANDLER },
{ TYPE_PPC_VIRTUAL_HYPERVISOR },
{ TYPE_XICS_FABRIC },
{ TYPE_INTERRUPT_STATS_PROVIDER },
{ }
},
};

View File

@@ -13,10 +13,12 @@
#include "hw/boards.h"
#include "qapi/error.h"
#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
#include "target/ppc/kvm_ppc.h"
#include "hw/ppc/ppc.h"
#include "target/ppc/mmu-hash64.h"
#include "sysemu/numa.h"
#include "qemu/error-report.h"
static void spapr_cpu_reset(void *opaque)
{
@@ -34,15 +36,26 @@ static void spapr_cpu_reset(void *opaque)
env->spr[SPR_HIOR] = 0;
ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift,
&error_fatal);
/*
* This is a hack for the benefit of KVM PR - it abuses the SDR1
* slot in kvm_sregs to communicate the userspace address of the
* HPT
*/
if (kvm_enabled()) {
env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab
| (spapr->htab_shift - 18);
if (kvmppc_put_books_sregs(cpu) < 0) {
error_report("Unable to update SDR1 in KVM");
exit(1);
}
}
}
static void spapr_cpu_destroy(PowerPCCPU *cpu)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
xics_cpu_destroy(spapr->xics, cpu);
xics_cpu_destroy(XICS_FABRIC(spapr), cpu);
qemu_unregister_reset(spapr_cpu_reset, cpu);
}
@@ -57,8 +70,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
/* Enable PAPR mode in TCG or KVM */
cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
cpu_ppc_set_papr(cpu);
cpu_ppc_set_papr(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
if (cpu->max_compat) {
Error *local_err = NULL;
@@ -76,7 +88,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
cs->numa_node = i;
}
xics_cpu_setup(spapr->xics, cpu);
xics_cpu_setup(XICS_FABRIC(spapr), cpu);
qemu_register_reset(spapr_cpu_reset, cpu);
spapr_cpu_reset(cpu);

View File

@@ -481,7 +481,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
qemu_irq_pulse(xics_get_qirq(spapr->xics,
qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
rtas_event_log_to_irq(spapr,
RTAS_LOG_TYPE_EPOW)));
}
@@ -574,7 +574,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
qemu_irq_pulse(xics_get_qirq(spapr->xics,
qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
rtas_event_log_to_irq(spapr,
RTAS_LOG_TYPE_HOTPLUG)));
}
@@ -695,7 +695,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
spapr_event_sources_get_source(spapr->event_sources, i);
g_assert(source->enabled);
qemu_irq_pulse(xics_get_qirq(spapr->xics, source->irq));
qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), source->irq));
}
}
@@ -752,7 +752,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
spapr->event_sources = spapr_event_sources_new();
spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW,
xics_spapr_alloc(spapr->xics, 0, false,
spapr_ics_alloc(spapr->ics, 0, false,
&error_fatal));
/* NOTE: if machine supports modern/dedicated hotplug event source,
@@ -765,7 +765,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
*/
if (spapr->use_hotplug_event_source) {
spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG,
xics_spapr_alloc(spapr->xics, 0, false,
spapr_ics_alloc(spapr->ics, 0, false,
&error_fatal));
}

View File

@@ -47,12 +47,12 @@ static bool has_spr(PowerPCCPU *cpu, int spr)
return cpu->env.spr_cb[spr].name != NULL;
}
static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index)
static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
{
/*
* hash value/pteg group index is normalized by htab_mask
* hash value/pteg group index is normalized by HPT mask
*/
if (((pte_index & ~7ULL) / HPTES_PER_GROUP) & ~env->htab_mask) {
if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
return false;
}
return true;
@@ -77,15 +77,14 @@ static bool is_ram_address(sPAPRMachineState *spapr, hwaddr addr)
static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
target_ulong pte_index = args[1];
target_ulong ptex = args[1];
target_ulong pteh = args[2];
target_ulong ptel = args[3];
unsigned apshift;
target_ulong raddr;
target_ulong index;
uint64_t token;
target_ulong slot;
const ppc_hash_pte64_t *hptes;
apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
if (!apshift) {
@@ -116,36 +115,36 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
pteh &= ~0x60ULL;
if (!valid_pte_index(env, pte_index)) {
if (!valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
index = 0;
slot = ptex & 7ULL;
ptex = ptex & ~7ULL;
if (likely((flags & H_EXACT) == 0)) {
pte_index &= ~7ULL;
token = ppc_hash64_start_access(cpu, pte_index);
for (; index < 8; index++) {
if (!(ppc_hash64_load_hpte0(cpu, token, index) & HPTE64_V_VALID)) {
hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
for (slot = 0; slot < 8; slot++) {
if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
break;
}
}
ppc_hash64_stop_access(cpu, token);
if (index == 8) {
ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
if (slot == 8) {
return H_PTEG_FULL;
}
} else {
token = ppc_hash64_start_access(cpu, pte_index);
if (ppc_hash64_load_hpte0(cpu, token, 0) & HPTE64_V_VALID) {
ppc_hash64_stop_access(cpu, token);
hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
return H_PTEG_FULL;
}
ppc_hash64_stop_access(cpu, token);
ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
}
ppc_hash64_store_hpte(cpu, pte_index + index,
pteh | HPTE64_V_HPTE_DIRTY, ptel);
ppc_hash64_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
args[0] = pte_index + index;
args[0] = ptex + slot;
return H_SUCCESS;
}
@@ -161,18 +160,17 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
target_ulong flags,
target_ulong *vp, target_ulong *rp)
{
CPUPPCState *env = &cpu->env;
uint64_t token;
const ppc_hash_pte64_t *hptes;
target_ulong v, r;
if (!valid_pte_index(env, ptex)) {
if (!valid_ptex(cpu, ptex)) {
return REMOVE_PARM;
}
token = ppc_hash64_start_access(cpu, ptex);
v = ppc_hash64_load_hpte0(cpu, token, 0);
r = ppc_hash64_load_hpte1(cpu, token, 0);
ppc_hash64_stop_access(cpu, token);
hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
v = ppc_hash64_hpte0(cpu, hptes, 0);
r = ppc_hash64_hpte1(cpu, hptes, 0);
ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
if ((v & HPTE64_V_VALID) == 0 ||
((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
@@ -191,11 +189,11 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
{
CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
target_ulong pte_index = args[1];
target_ulong ptex = args[1];
target_ulong avpn = args[2];
RemoveResult ret;
ret = remove_hpte(cpu, pte_index, avpn, flags,
ret = remove_hpte(cpu, ptex, avpn, flags,
&args[0], &args[1]);
switch (ret) {
@@ -291,19 +289,19 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
{
CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
target_ulong pte_index = args[1];
target_ulong ptex = args[1];
target_ulong avpn = args[2];
uint64_t token;
const ppc_hash_pte64_t *hptes;
target_ulong v, r;
if (!valid_pte_index(env, pte_index)) {
if (!valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
token = ppc_hash64_start_access(cpu, pte_index);
v = ppc_hash64_load_hpte0(cpu, token, 0);
r = ppc_hash64_load_hpte1(cpu, token, 0);
ppc_hash64_stop_access(cpu, token);
hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
v = ppc_hash64_hpte0(cpu, hptes, 0);
r = ppc_hash64_hpte1(cpu, hptes, 0);
ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
if ((v & HPTE64_V_VALID) == 0 ||
((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
@@ -315,36 +313,35 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
r |= (flags << 55) & HPTE64_R_PP0;
r |= (flags << 48) & HPTE64_R_KEY_HI;
r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
ppc_hash64_store_hpte(cpu, pte_index,
ppc_hash64_store_hpte(cpu, ptex,
(v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
ppc_hash64_tlb_flush_hpte(cpu, pte_index, v, r);
ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
/* Flush the tlb */
check_tlb_flush(env, true);
/* Don't need a memory barrier, due to qemu's global lock */
ppc_hash64_store_hpte(cpu, pte_index, v | HPTE64_V_HPTE_DIRTY, r);
ppc_hash64_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
return H_SUCCESS;
}
static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
target_ulong pte_index = args[1];
target_ulong ptex = args[1];
uint8_t *hpte;
int i, ridx, n_entries = 1;
if (!valid_pte_index(env, pte_index)) {
if (!valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
if (flags & H_READ_4) {
/* Clear the two low order bits */
pte_index &= ~(3ULL);
ptex &= ~(3ULL);
n_entries = 4;
}
hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
hpte = spapr->htab + (ptex * HASH_PTE_SIZE_64);
for (i = 0, ridx = 0; i < n_entries; i++) {
args[ridx++] = ldq_p(hpte);

View File

@@ -43,6 +43,7 @@
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci/pci_ids.h"
#include "hw/ppc/spapr_drc.h"
#include "sysemu/device_tree.h"
#include "sysemu/kvm.h"
@@ -325,7 +326,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
return;
}
xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
if (msi_present(pdev)) {
spapr_msi_setmsg(pdev, 0, false, 0, 0);
}
@@ -363,7 +364,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
}
/* Allocate MSIs */
irq = xics_spapr_alloc_block(spapr->xics, req_num, false,
irq = spapr_ics_alloc_block(spapr->ics, req_num, false,
ret_intr_type == RTAS_TYPE_MSI, &err);
if (err) {
error_reportf_err(err, "Can't allocate MSIs for device %x: ",
@@ -374,7 +375,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
/* Release previous MSIs */
if (msi) {
xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
g_hash_table_remove(phb->msi, &config_addr);
}
@@ -736,7 +737,7 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
trace_spapr_pci_msi_write(addr, data, irq);
qemu_irq_pulse(xics_get_qirq(spapr->xics, irq));
qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), irq));
}
static const MemoryRegionOps spapr_msi_ops = {
@@ -946,6 +947,274 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
rp->assigned_len = assigned_idx * sizeof(ResourceFields);
}
typedef struct PCIClass PCIClass;
typedef struct PCISubClass PCISubClass;
typedef struct PCIIFace PCIIFace;
struct PCIIFace {
int iface;
const char *name;
};
struct PCISubClass {
int subclass;
const char *name;
const PCIIFace *iface;
};
struct PCIClass {
const char *name;
const PCISubClass *subc;
};
static const PCISubClass undef_subclass[] = {
{ PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass mass_subclass[] = {
{ PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
{ PCI_CLASS_STORAGE_IDE, "ide", NULL },
{ PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
{ PCI_CLASS_STORAGE_IPI, "ipi", NULL },
{ PCI_CLASS_STORAGE_RAID, "raid", NULL },
{ PCI_CLASS_STORAGE_ATA, "ata", NULL },
{ PCI_CLASS_STORAGE_SATA, "sata", NULL },
{ PCI_CLASS_STORAGE_SAS, "sas", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass net_subclass[] = {
{ PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
{ PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
{ PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
{ PCI_CLASS_NETWORK_ATM, "atm", NULL },
{ PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
{ PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
{ PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass displ_subclass[] = {
{ PCI_CLASS_DISPLAY_VGA, "vga", NULL },
{ PCI_CLASS_DISPLAY_XGA, "xga", NULL },
{ PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass media_subclass[] = {
{ PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
{ PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
{ PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass mem_subclass[] = {
{ PCI_CLASS_MEMORY_RAM, "memory", NULL },
{ PCI_CLASS_MEMORY_FLASH, "flash", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass bridg_subclass[] = {
{ PCI_CLASS_BRIDGE_HOST, "host", NULL },
{ PCI_CLASS_BRIDGE_ISA, "isa", NULL },
{ PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
{ PCI_CLASS_BRIDGE_MC, "mca", NULL },
{ PCI_CLASS_BRIDGE_PCI, "pci", NULL },
{ PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
{ PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
{ PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
{ PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
{ PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL },
{ PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass comm_subclass[] = {
{ PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL },
{ PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL },
{ PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL },
{ PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL },
{ PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL },
{ PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL },
{ 0xFF, NULL, NULL, },
};
static const PCIIFace pic_iface[] = {
{ PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" },
{ PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" },
{ 0xFF, NULL },
};
static const PCISubClass sys_subclass[] = {
{ PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface },
{ PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL },
{ PCI_CLASS_SYSTEM_TIMER, "timer", NULL },
{ PCI_CLASS_SYSTEM_RTC, "rtc", NULL },
{ PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL },
{ PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass inp_subclass[] = {
{ PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL },
{ PCI_CLASS_INPUT_PEN, "pen", NULL },
{ PCI_CLASS_INPUT_MOUSE, "mouse", NULL },
{ PCI_CLASS_INPUT_SCANNER, "scanner", NULL },
{ PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass dock_subclass[] = {
{ PCI_CLASS_DOCKING_GENERIC, "dock", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass cpu_subclass[] = {
{ PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL },
{ PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL },
{ PCI_CLASS_PROCESSOR_MIPS, "mips", NULL },
{ PCI_CLASS_PROCESSOR_CO, "co-processor", NULL },
{ 0xFF, NULL, NULL },
};
static const PCIIFace usb_iface[] = {
{ PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" },
{ PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", },
{ PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" },
{ PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" },
{ PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" },
{ PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" },
{ 0xFF, NULL },
};
static const PCISubClass ser_subclass[] = {
{ PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL },
{ PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL },
{ PCI_CLASS_SERIAL_SSA, "ssa", NULL },
{ PCI_CLASS_SERIAL_USB, "usb", usb_iface },
{ PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL },
{ PCI_CLASS_SERIAL_SMBUS, "smb", NULL },
{ PCI_CLASS_SERIAL_IB, "infiniband", NULL },
{ PCI_CLASS_SERIAL_IPMI, "ipmi", NULL },
{ PCI_CLASS_SERIAL_SERCOS, "sercos", NULL },
{ PCI_CLASS_SERIAL_CANBUS, "canbus", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass wrl_subclass[] = {
{ PCI_CLASS_WIRELESS_IRDA, "irda", NULL },
{ PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL },
{ PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL },
{ PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL },
{ PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass sat_subclass[] = {
{ PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL },
{ PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL },
{ PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL },
{ PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass crypt_subclass[] = {
{ PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL },
{ PCI_CLASS_CRYPT_ENTERTAINMENT,
"entertainment-encryption", NULL },
{ 0xFF, NULL, NULL },
};
static const PCISubClass spc_subclass[] = {
{ PCI_CLASS_SP_DPIO, "dpio", NULL },
{ PCI_CLASS_SP_PERF, "counter", NULL },
{ PCI_CLASS_SP_SYNCH, "measurement", NULL },
{ PCI_CLASS_SP_MANAGEMENT, "management-card", NULL },
{ 0xFF, NULL, NULL },
};
static const PCIClass pci_classes[] = {
{ "legacy-device", undef_subclass },
{ "mass-storage", mass_subclass },
{ "network", net_subclass },
{ "display", displ_subclass, },
{ "multimedia-device", media_subclass },
{ "memory-controller", mem_subclass },
{ "unknown-bridge", bridg_subclass },
{ "communication-controller", comm_subclass},
{ "system-peripheral", sys_subclass },
{ "input-controller", inp_subclass },
{ "docking-station", dock_subclass },
{ "cpu", cpu_subclass },
{ "serial-bus", ser_subclass },
{ "wireless-controller", wrl_subclass },
{ "intelligent-io", NULL },
{ "satellite-device", sat_subclass },
{ "encryption", crypt_subclass },
{ "data-processing-controller", spc_subclass },
};
static const char *pci_find_device_name(uint8_t class, uint8_t subclass,
uint8_t iface)
{
const PCIClass *pclass;
const PCISubClass *psubclass;
const PCIIFace *piface;
const char *name;
if (class >= ARRAY_SIZE(pci_classes)) {
return "pci";
}
pclass = pci_classes + class;
name = pclass->name;
if (pclass->subc == NULL) {
return name;
}
psubclass = pclass->subc;
while ((psubclass->subclass & 0xff) != 0xff) {
if ((psubclass->subclass & 0xff) == subclass) {
name = psubclass->name;
break;
}
psubclass++;
}
piface = psubclass->iface;
if (piface == NULL) {
return name;
}
while ((piface->iface & 0xff) != 0xff) {
if ((piface->iface & 0xff) == iface) {
name = piface->name;
break;
}
piface++;
}
return name;
}
static void pci_get_node_name(char *nodename, int len, PCIDevice *dev)
{
int slot = PCI_SLOT(dev->devfn);
int func = PCI_FUNC(dev->devfn);
uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
const char *name;
name = pci_find_device_name((ccode >> 16) & 0xff, (ccode >> 8) & 0xff,
ccode & 0xff);
if (func != 0) {
snprintf(nodename, len, "%s@%x,%x", name, slot, func);
} else {
snprintf(nodename, len, "%s@%x", name, slot);
}
}
static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
PCIDevice *pdev);
@@ -957,6 +1226,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
int pci_status, err;
char *buf = NULL;
uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
uint32_t max_msi, max_msix;
if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
@@ -971,8 +1241,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
pci_default_read_config(dev, PCI_DEVICE_ID, 2)));
_FDT(fdt_setprop_cell(fdt, offset, "revision-id",
pci_default_read_config(dev, PCI_REVISION_ID, 1)));
_FDT(fdt_setprop_cell(fdt, offset, "class-code",
pci_default_read_config(dev, PCI_CLASS_PROG, 3)));
_FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode));
if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) {
_FDT(fdt_setprop_cell(fdt, offset, "interrupts",
pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)));
@@ -1013,11 +1282,10 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
_FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
}
/* NOTE: this is normally generated by firmware via path/unit name,
* but in our case we must set it manually since it does not get
* processed by OF beforehand
*/
_FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
_FDT(fdt_setprop_string(fdt, offset, "name",
pci_find_device_name((ccode >> 16) & 0xff,
(ccode >> 8) & 0xff,
ccode & 0xff)));
buf = spapr_phb_get_loc_code(sphb, dev);
if (!buf) {
error_report("Failed setting the ibm,loc-code");
@@ -1061,15 +1329,9 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
void *fdt, int node_offset)
{
int offset, ret;
int slot = PCI_SLOT(dev->devfn);
int func = PCI_FUNC(dev->devfn);
char nodename[FDT_NAME_MAX];
if (func != 0) {
snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func);
} else {
snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot);
}
pci_get_node_name(nodename, FDT_NAME_MAX, dev);
offset = fdt_add_subnode(fdt, node_offset, nodename);
ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb);
@@ -1485,7 +1747,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
uint32_t irq;
Error *local_err = NULL;
irq = xics_spapr_alloc_block(spapr->xics, 1, true, false, &local_err);
irq = spapr_ics_alloc_block(spapr->ics, 1, true, false, &local_err);
if (local_err) {
error_propagate(errp, local_err);
error_prepend(errp, "can't allocate LSIs: ");
@@ -1782,9 +2044,9 @@ static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
s_fdt.fdt = p->fdt;
s_fdt.node_off = offset;
s_fdt.sphb = p->sphb;
pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
spapr_populate_pci_devices_dt,
&s_fdt);
pci_for_each_device_reverse(sec_bus, pci_bus_num(sec_bus),
spapr_populate_pci_devices_dt,
&s_fdt);
}
static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
@@ -1953,9 +2215,9 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
s_fdt.fdt = fdt;
s_fdt.node_off = bus_off;
s_fdt.sphb = phb;
pci_for_each_device(bus, pci_bus_num(bus),
spapr_populate_pci_devices_dt,
&s_fdt);
pci_for_each_device_reverse(bus, pci_bus_num(bus),
spapr_populate_pci_devices_dt,
&s_fdt);
ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
SPAPR_DR_CONNECTOR_TYPE_PCI);

View File

@@ -454,7 +454,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
dev->qdev.id = id;
}
dev->irq = xics_spapr_alloc(spapr->xics, dev->irq, false, &local_err);
dev->irq = spapr_ics_alloc(spapr->ics, dev->irq, false, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;

View File

@@ -20,6 +20,7 @@
#include "hw/s390x/virtio-ccw.h"
#include "hw/s390x/css.h"
#include "ipl.h"
#include "qemu/error-report.h"
#define KERN_IMAGE_START 0x010000UL
#define KERN_PARM_AREA 0x010480UL
@@ -209,6 +210,7 @@ static Property s390_ipl_properties[] = {
DEFINE_PROP_STRING("initrd", S390IPLState, initrd),
DEFINE_PROP_STRING("cmdline", S390IPLState, cmdline),
DEFINE_PROP_STRING("firmware", S390IPLState, firmware),
DEFINE_PROP_STRING("netboot_fw", S390IPLState, netboot_fw),
DEFINE_PROP_BOOL("enforce_bios", S390IPLState, enforce_bios, false),
DEFINE_PROP_BOOL("iplbext_migration", S390IPLState, iplbext_migration,
true),
@@ -226,6 +228,12 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
TYPE_VIRTIO_CCW_DEVICE);
SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st),
TYPE_SCSI_DEVICE);
VirtIONet *vn = (VirtIONet *) object_dynamic_cast(OBJECT(dev_st),
TYPE_VIRTIO_NET);
if (vn) {
ipl->netboot = true;
}
if (virtio_ccw_dev) {
CcwDevice *ccw_dev = CCW_DEVICE(virtio_ccw_dev);
@@ -258,12 +266,86 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
return false;
}
static int load_netboot_image(Error **errp)
{
S390IPLState *ipl = get_ipl_device();
char *netboot_filename;
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *mr = NULL;
void *ram_ptr = NULL;
int img_size = -1;
mr = memory_region_find(sysmem, 0, 1).mr;
if (!mr) {
error_setg(errp, "Failed to find memory region at address 0");
return -1;
}
ram_ptr = memory_region_get_ram_ptr(mr);
if (!ram_ptr) {
error_setg(errp, "No RAM found");
goto unref_mr;
}
netboot_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, ipl->netboot_fw);
if (netboot_filename == NULL) {
error_setg(errp, "Could not find network bootloader");
goto unref_mr;
}
img_size = load_elf_ram(netboot_filename, NULL, NULL, &ipl->start_addr,
NULL, NULL, 1, EM_S390, 0, 0, NULL, false);
if (img_size < 0) {
img_size = load_image_size(netboot_filename, ram_ptr, ram_size);
ipl->start_addr = KERN_IMAGE_START;
}
if (img_size < 0) {
error_setg(errp, "Failed to load network bootloader");
}
g_free(netboot_filename);
unref_mr:
memory_region_unref(mr);
return img_size;
}
static bool is_virtio_net_device(IplParameterBlock *iplb)
{
uint8_t cssid;
uint8_t ssid;
uint16_t devno;
uint16_t schid;
SubchDev *sch = NULL;
if (iplb->pbt != S390_IPL_TYPE_CCW) {
return false;
}
devno = be16_to_cpu(iplb->ccw.devno);
ssid = iplb->ccw.ssid & 3;
for (schid = 0; schid < MAX_SCHID; schid++) {
for (cssid = 0; cssid < MAX_CSSID; cssid++) {
sch = css_find_subch(1, cssid, ssid, schid);
if (sch && sch->devno == devno) {
return sch->id.cu_model == VIRTIO_ID_NET;
}
}
}
return false;
}
void s390_ipl_update_diag308(IplParameterBlock *iplb)
{
S390IPLState *ipl = get_ipl_device();
ipl->iplb = *iplb;
ipl->iplb_valid = true;
ipl->netboot = is_virtio_net_device(iplb);
}
IplParameterBlock *s390_ipl_get_iplb(void)
@@ -287,6 +369,7 @@ void s390_reipl_request(void)
void s390_ipl_prepare_cpu(S390CPU *cpu)
{
S390IPLState *ipl = get_ipl_device();
Error *err = NULL;
cpu->env.psw.addr = ipl->start_addr;
cpu->env.psw.mask = IPL_PSW_MASK;
@@ -297,6 +380,13 @@ void s390_ipl_prepare_cpu(S390CPU *cpu)
ipl->iplb_valid = s390_gen_initial_iplb(ipl);
}
}
if (ipl->netboot) {
if (load_netboot_image(&err) < 0) {
error_report_err(err);
vm_stop(RUN_STATE_INTERNAL_ERROR);
}
ipl->iplb.ccw.netboot_start_addr = ipl->start_addr;
}
}
static void s390_ipl_reset(DeviceState *dev)

View File

@@ -16,7 +16,8 @@
#include "cpu.h"
struct IplBlockCcw {
uint8_t reserved0[85];
uint64_t netboot_start_addr;
uint8_t reserved0[77];
uint8_t ssid;
uint16_t devno;
uint8_t vm_flags;
@@ -100,12 +101,14 @@ struct S390IPLState {
IplParameterBlock iplb;
bool iplb_valid;
bool reipl_requested;
bool netboot;
/*< public >*/
char *kernel;
char *initrd;
char *cmdline;
char *firmware;
char *netboot_fw;
uint8_t cssid;
uint8_t ssid;
uint16_t devno;

View File

@@ -116,7 +116,8 @@ static void ccw_init(MachineState *machine)
/* get a BUS */
css_bus = virtual_css_bus_init();
s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline,
machine->initrd_filename, "s390-ccw.img", true);
machine->initrd_filename, "s390-ccw.img",
"s390-netboot.img", true);
s390_flic_init();
dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);

View File

@@ -65,6 +65,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
const char *kernel_cmdline,
const char *initrd_filename,
const char *firmware,
const char *netboot_fw,
bool enforce_bios)
{
Object *new = object_new(TYPE_S390_IPL);
@@ -78,6 +79,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
}
qdev_prop_set_string(dev, "cmdline", kernel_cmdline);
qdev_prop_set_string(dev, "firmware", firmware);
qdev_prop_set_string(dev, "netboot_fw", netboot_fw);
qdev_prop_set_bit(dev, "enforce_bios", enforce_bios);
object_property_add_child(qdev_get_machine(), TYPE_S390_IPL,
new, NULL);

View File

@@ -24,6 +24,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
const char *kernel_cmdline,
const char *initrd_filename,
const char *firmware,
const char *netboot_fw,
bool enforce_bios);
void s390_create_virtio_net(BusState *bus, const char *name);
void s390_nmi(NMIState *n, int cpu_index, Error **errp);

View File

@@ -2240,7 +2240,7 @@ static void scsi_disk_resize_cb(void *opaque)
}
}
static void scsi_cd_change_media_cb(void *opaque, bool load)
static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp)
{
SCSIDiskState *s = opaque;
@@ -2328,7 +2328,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
return;
}
}
blkconf_apply_backend_options(&dev->conf);
blkconf_apply_backend_options(&dev->conf,
blk_is_read_only(s->qdev.conf.blk),
dev->type == TYPE_DISK, &err);
if (err) {
error_propagate(errp, err);
return;
}
if (s->qdev.conf.discard_granularity == -1) {
s->qdev.conf.discard_granularity =
@@ -2380,7 +2386,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
if (!dev->conf.blk) {
dev->conf.blk = blk_new();
dev->conf.blk = blk_new(0, BLK_PERM_ALL);
}
s->qdev.blocksize = 2048;

View File

@@ -131,6 +131,33 @@ void sdbus_set_readonly(SDBus *sdbus, bool readonly)
}
}
void sdbus_reparent_card(SDBus *from, SDBus *to)
{
SDState *card = get_card(from);
SDCardClass *sc;
bool readonly;
/* We directly reparent the card object rather than implementing this
* as a hotpluggable connection because we don't want to expose SD cards
* to users as being hotpluggable, and we can get away with it in this
* limited use case. This could perhaps be implemented more cleanly in
* future by adding support to the hotplug infrastructure for "device
* can be hotplugged only via code, not by user".
*/
if (!card) {
return;
}
sc = SD_CARD_GET_CLASS(card);
readonly = sc->get_readonly(card);
sdbus_set_inserted(from, false);
qdev_set_parent_bus(DEVICE(card), &to->qbus);
sdbus_set_inserted(to, true);
sdbus_set_readonly(to, readonly);
}
static const TypeInfo sd_bus_info = {
.name = TYPE_SD_BUS,
.parent = TYPE_BUS,

View File

@@ -458,7 +458,7 @@ static bool sd_get_readonly(SDState *sd)
return sd->wp_switch;
}
static void sd_cardchange(void *opaque, bool load)
static void sd_cardchange(void *opaque, bool load, Error **errp)
{
SDState *sd = opaque;
DeviceState *dev = DEVICE(sd);
@@ -1887,6 +1887,7 @@ static void sd_instance_finalize(Object *obj)
static void sd_realize(DeviceState *dev, Error **errp)
{
SDState *sd = SD_CARD(dev);
int ret;
if (sd->blk && blk_is_read_only(sd->blk)) {
error_setg(errp, "Cannot use read-only drive as SD card");
@@ -1894,6 +1895,11 @@ static void sd_realize(DeviceState *dev, Error **errp)
}
if (sd->blk) {
ret = blk_set_perm(sd->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
BLK_PERM_ALL, errp);
if (ret < 0) {
return;
}
blk_set_dev_ops(sd->blk, &sd_block_ops, sd);
}
}

View File

@@ -1,5 +1,6 @@
common-obj-$(CONFIG_ARM_TIMER) += arm_timer.o
common-obj-$(CONFIG_ARM_MPTIMER) += arm_mptimer.o
common-obj-$(CONFIG_ARM_V7M) += armv7m_systick.o
common-obj-$(CONFIG_A9_GTIMER) += a9gtimer.o
common-obj-$(CONFIG_CADENCE) += cadence_ttc.o
common-obj-$(CONFIG_DS1338) += ds1338.o

Some files were not shown because too many files have changed in this diff Show More