Compare commits

..

687 Commits

Author SHA1 Message Date
Marc-André Lureau
7bc4f0846f vnc: fix a qio-channel leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 20170317092802.17973-1-marcandre.lureau@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-20 09:07:34 +01:00
Gerd Hoffmann
f019722cbb cirrus: fix off-by-one in cirrus_bitblt_rop_bkwd_transp_*_16
The switch from pointers to addresses (commit
026aeffcb4 and
ffaf857778) added
a off-by-one bug to 16bit backward blits.  Fix.

Reported-by: 李强 <liqiang6-s@360.cn>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 1489735296-19047-1-git-send-email-kraxel@redhat.com
2017-03-17 10:23:44 +01:00
Alex Bennée
8bb93c6f99 ui/console: ensure graphic updates don't race with TCG vCPUs
Commit 8d04fb55..

  tcg: drop global lock during TCG code execution

..broke the assumption that updates to the GUI couldn't happen at the
same time as TCG vCPUs where running. As a result the TCG vCPU could
still be updating a directly mapped frame-buffer while the display
side was updating. This would cause artefacts to appear when the
update code assumed that memory block hadn't changed.

The simplest solution is to ensure the two things can't happen at the
same time like the old BQL locking scheme. Here we use the solution
introduced for MTTCG and schedule the update as async_safe_work when
we know no vCPUs can be running.

Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 20170315144825.3108-1-alex.bennee@linaro.org
Cc: BALATON Zoltan <balaton@eik.bme.hu>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

[ kraxel: updated comment clarifying the display adapters are buggy
          and this is a temporary workaround ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-17 10:17:21 +01:00
Peter Maydell
272d7dee59 Merge remote-tracking branch 'remotes/kraxel/tags/pull-cirrus-20170316-1' into staging
cirrus: blitter fixes.

# gpg: Signature made Thu 16 Mar 2017 09:05:22 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-cirrus-20170316-1:
  cirrus: stop passing around src pointers in the blitter
  cirrus: stop passing around dst pointers in the blitter
  cirrus: fix cirrus_invalidate_region
  cirrus: add option to disable blitter
  cirrus: switch to 4 MB video memory by default
  cirrus/vnc: zap bitblit support from console code.
  fix :cirrus_vga fix OOB read case qemu Segmentation fault

# Conflicts:
#	include/hw/compat.h

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-16 16:40:44 +00:00
Peter Maydell
c5e737e5fb Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170316' into staging
migration/next for 20170316

# gpg: Signature made Thu 16 Mar 2017 08:21:51 GMT
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration/20170316:
  postcopy: Check for shared memory
  RAMBlocks: qemu_ram_is_shared
  vmstate: fix failed iotests case 68 and 91
  migration/block: Avoid invoking blk_drain too frequently
  migration: use "" as the default for tls-creds/hostname
  Change the method to calculate dirty-pages-rate

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-16 15:32:08 +00:00
Peter Maydell
094a9a7cd6 Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
Pull request

Tracing makefile fixes for QEMU 2.9.

# gpg: Signature made Thu 16 Mar 2017 06:56:10 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  trace: ensure $(tracetool-y) is defined in top level makefile
  makefile: generate trace-events-all upfront
  makefile: merge GENERATED_HEADERS & GENERATED_SOURCES variables

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-16 14:23:10 +00:00
Peter Maydell
699f6c6fd4 dtc: Revert unintentional submodule downgrade from commit c2cabb3422
Commit c2cabb3422 inadvertently downgraded the 'dtc' submodule,
undoing the increments added in earlier commits. Revert this,
returning the submodule state to where we should be.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-16 14:11:15 +00:00
Peter Maydell
3c2758c286 Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-03-16' into staging
QAPI patches for 2017-03-16

# gpg: Signature made Thu 16 Mar 2017 06:18:38 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2017-03-16: (49 commits)
  qapi: Fix a misleading parser error message
  qapi: Make pylint a bit happier
  qapi: Drop unused .check_clash() parameter schema
  qapi: union_types is a list used like a dict, make it one
  qapi: struct_types is a list used like a dict, make it one
  qapi: enum_types is a list used like a dict, make it one
  qapi: Factor add_name() calls out of the meta conditional
  qapi: Simplify what gets stored in enum_types
  qapi: Drop unused variable events
  qapi: Eliminate check_docs() and drop QAPIDoc.expr
  qapi: Fix detection of bogus member documentation
  tests/qapi-schema: Improve coverage of bogus member docs
  tests/qapi-schema: Rename doc-bad-args to doc-bad-command-arg
  qapi: Move empty doc section checking to doc parser
  qapi: Improve error message on @NAME: in free-form doc
  qapi: Move detection of doc / expression name mismatch
  qapi: Fix detection of doc / expression mismatch
  tests/qapi-schema: Improve doc / expression mismatch coverage
  qapi2texi: Use category "Object" for all object types
  qapi2texi: Generate descriptions for simple union tags
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-16 12:05:03 +00:00
Peter Maydell
3716fba3f5 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, pci: fixes

More fixes missed in the previous pull request.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Thu 16 Mar 2017 02:29:49 GMT
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  virtio-serial-bus: Delete timer from list before free it
  hw/virtio: fix Power Management Control Register for PCI Express virtio devices
  hw/virtio: fix Link Control Register for PCI Express virtio devices
  hw/virtio: fix error enabling flags in Device Control register
  hw/pcie: fix Extended Configuration Space for devices with no Extended Capabilities

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-16 11:05:47 +00:00
Peter Maydell
7c75638028 Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into staging
# gpg: Signature made Thu 16 Mar 2017 00:52:41 GMT
# gpg:                using RSA key 0x7DEF8106AAFC390E
# gpg: Good signature from "John Snow (John Huston) <jsnow@redhat.com>"
# Primary key fingerprint: FAEB 9711 A12C F475 812F  18F2 88A9 064D 1835 61EB
#      Subkey fingerprint: F9B7 ABDB BCAC DF95 BE76  CBD0 7DEF 8106 AAFC 390E

* remotes/jnsnow/tags/ide-pull-request:
  ide: ahci: call cleanup function in ahci unit
  ide: core: add cleanup function
  ide: qdev: register ide bus unrealize function

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-16 10:04:41 +00:00
Dr. David Alan Gilbert
8679638b0e postcopy: Check for shared memory
Postcopy doesn't support migration of RAM shared with another process
yet (we've got a bunch of things to understand).
Check for the case and don't allow postcopy to be enabled.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-03-16 09:02:26 +01:00
Dr. David Alan Gilbert
463a4ac23b RAMBlocks: qemu_ram_is_shared
Provide a helper to say whether a RAMBlock was created as a
shared mapping.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-03-16 09:00:58 +01:00
QingFeng Hao
e1e686c1fa vmstate: fix failed iotests case 68 and 91
This problem affects s390x only if we are running without KVM.
Basically, S390CPU.irqstate is unused if we do not use KVM,
and thus no buffer is allocated.
This causes size=0, first_elem=NULL and n_elems=1 in
vmstate_load_state and vmstate_save_state. And the assert fails.
With this fix we can go back to the old behavior and support
VMS_VBUFFER with size 0 and nullptr.

Signed-off-by: QingFeng Hao <haoqf@linux.vnet.ibm.com>
Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-03-16 08:59:52 +01:00
Lidong Chen
1cf6aa74b3 migration/block: Avoid invoking blk_drain too frequently
Increase bmds->cur_dirty after submit io, so reduce the frequency
involve into blk_drain, and improve the performance obviously
when block migration.

The performance test result of this patch:

During the block dirty save phase, this patch improve guest os IOPS
from 4.0K to 9.5K. and improve the migration speed from
505856 rsec/s to 855756 rsec/s.

Signed-off-by: Lidong Chen <jemmy858585@gmail.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-03-16 08:58:27 +01:00
Gerd Hoffmann
ffaf857778 cirrus: stop passing around src pointers in the blitter
Does basically the same as "cirrus: stop passing around dst pointers in
the blitter", just for the src pointer instead of the dst pointer.

For the src we have to care about cputovideo blits though and fetch the
data from s->cirrus_bltbuf instead of vga memory.  The cirrus_src*()
helper functions handle that.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1489584487-3489-1-git-send-email-kraxel@redhat.com
2017-03-16 08:58:16 +01:00
Gerd Hoffmann
026aeffcb4 cirrus: stop passing around dst pointers in the blitter
Instead pass around the address (aka offset into vga memory).  Calculate
the pointer in the rop_* functions, after applying the mask to the
address, to make sure the address stays within the valid range.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1489574872-8679-1-git-send-email-kraxel@redhat.com
2017-03-16 08:58:15 +01:00
Gerd Hoffmann
e048dac616 cirrus: fix cirrus_invalidate_region
off_cur_end is exclusive, so off_cur_end == cirrus_addr_mask is valid.
Fix calculation to make sure to allow that, otherwise the assert added
by commit f153b563f8 can trigger for valid
blits.

Test case: boot windows nt 4.0

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1489579606-26020-1-git-send-email-kraxel@redhat.com
2017-03-16 08:58:15 +01:00
Gerd Hoffmann
827bd51726 cirrus: add option to disable blitter
Ok, we have this beast in the cirrus code which is not used at all by
modern guests, except when you try to find security holes in qemu.  So,
add an option to disable blitter altogether.  Guests released within
the last ten years should not show any rendering issues if you turn off
blitter support.

There are no known bugs in the cirrus blitter code.  But in the past we
hoped a few times already that we've finally nailed the last issue.  So
having some easy way to mitigate in case yet another blitter issue shows
up certainly makes me sleep a bit better at night.

For completeness:  The by far better way to mitigate is to switch away
from cirrus and use stdvga instead.  Or something more modern like
virtio-vga in case your guest has support for it.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1489494540-15745-1-git-send-email-kraxel@redhat.com
2017-03-16 08:58:15 +01:00
Gerd Hoffmann
73c148130b cirrus: switch to 4 MB video memory by default
Quoting cirrus source code:
   Follow real hardware, cirrus card emulated has 4 MB video memory.
   Also accept 8 MB/16 MB for backward compatibility.

So just use 4MB by default.  We decided to leave that at 8MB by default
a while ago, for live migration compatibility reasons.  But we have
compat properties to handle that, so that isn't a compeling reason.

This also removes some sanity check inconsistencies in the cirrus code.
Some places check against the allocated video memory, some places check
against the 4MB physical hardware has.  Guest code can trigger asserts
because of that.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1489494514-15606-1-git-send-email-kraxel@redhat.com
2017-03-16 08:58:15 +01:00
Gerd Hoffmann
50628d3479 cirrus/vnc: zap bitblit support from console code.
There is a special code path (dpy_gfx_copy) to allow graphic emulation
notify user interface code about bitblit operations carryed out by
guests.  It is supported by cirrus and vnc server.  The intended purpose
is to optimize display scrolls and just send over the scroll op instead
of a full display update.

This is rarely used these days though because modern guests simply don't
use the cirrus blitter any more.  Any linux guest using the cirrus drm
driver doesn't.  Any windows guest newer than winxp doesn't ship with a
cirrus driver any more and thus uses the cirrus as simple framebuffer.

So this code tends to bitrot and bugs can go unnoticed for a long time.
See for example commit "3e10c3e vnc: fix qemu crash because of SIGSEGV"
which fixes a bug lingering in the code for almost a year, added by
commit "c7628bf vnc: only alloc server surface with clients connected".

Also the vnc server will throttle the frame rate in case it figures the
network can't keep up (send buffers are full).  This doesn't work with
dpy_gfx_copy, for any copy operation sent to the vnc client we have to
send all outstanding updates beforehand, otherwise the vnc client might
run the client side blit on outdated data and thereby corrupt the
display.  So this dpy_gfx_copy "optimization" might even make things
worse on slow network links.

Lets kill it once for all.

Oh, and one more reason: Turns out (after writing the patch) we have a
security bug in that code path ...

Fixes: CVE-2016-9603
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1489494419-14340-1-git-send-email-kraxel@redhat.com
2017-03-16 08:58:15 +01:00
hangaohuai
215902d7b6 fix :cirrus_vga fix OOB read case qemu Segmentation fault
check the validity of parameters in cirrus_bitblt_rop_fwd_transp_xxx
and cirrus_bitblt_rop_fwd_xxx to avoid the OOB read which causes qemu Segmentation fault.

After the fix, we will touch the assert in
cirrus_invalidate_region:
assert(off_cur_end >= off_cur);

Signed-off-by: fangying <fangying1@huawei.com>
Signed-off-by: hangaohuai <hangaohuai@huawei.com>
Message-id: 20170314063919.16200-1-hangaohuai@huawei.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-16 08:58:15 +01:00
Daniel P. Berrange
4af245dc3e migration: use "" as the default for tls-creds/hostname
The tls-creds parameter has a default value of NULL indicating
that TLS should not be used. Setting it to non-NULL enables
use of TLS. Once tls-creds are set to a non-NULL value via the
monitor, it isn't possible to set them back to NULL again, due
to current implementation limitations. The empty string is not
a valid QObject identifier, so this switches to use "" as the
default, indicating that TLS will not be used

The tls-hostname parameter has a default value of NULL indicating
the the hostname from the migrate connection URI should be used.
Again, once tls-hostname is set non-NULL, to override the default
hostname for x509 cert validation, it isn't possible to reset it
back to NULL via the monitor. The empty string is not a valid
hostname, so this switches to use "" as the default, indicating
that the migrate URI hostname should be used.

Using "" as the default for both, also means that the monitor
commands "info migrate_parameters" / "query-migrate-parameters"
will report existance of tls-creds/tls-parameters even when set
to their default values.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>

Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-03-16 08:57:08 +01:00
Chao Fan
1ffb5dfd35 Change the method to calculate dirty-pages-rate
In function cpu_physical_memory_sync_dirty_bitmap, file
include/exec/ram_addr.h:

if (src[idx][offset]) {
    unsigned long bits = atomic_xchg(&src[idx][offset], 0);
    unsigned long new_dirty;
    new_dirty = ~dest[k];
    dest[k] |= bits;
    new_dirty &= bits;
    num_dirty += ctpopl(new_dirty);
}

After these codes executed, only the pages not dirtied in bitmap(dest),
but dirtied in dirty_memory[DIRTY_MEMORY_MIGRATION] will be calculated.
For example:
When ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION] = 0b00001111,
and atomic_rcu_read(&migration_bitmap_rcu)->bmap = 0b00000011,
the new_dirty will be 0b00001100, and this function will return 2 but not
4 which is expected.
the dirty pages in dirty_memory[DIRTY_MEMORY_MIGRATION] are all new,
so these should be calculated also.

Signed-off-by: Chao Fan <fanc.fnst@cn.fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2017-03-16 08:55:56 +01:00
Peter Maydell
5b467b9081 Merge remote-tracking branch 'remotes/mcayland/tags/qemu-openbios-signed' into staging
Update OpenBIOS images

# gpg: Signature made Wed 15 Mar 2017 21:01:53 GMT
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* remotes/mcayland/tags/qemu-openbios-signed:
  Update OpenBIOS images to f233c3f built from submodule.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-16 07:47:12 +00:00
Markus Armbruster
012b126de2 qapi: Fix a misleading parser error message
When choking on a token where an expression is expected, we report
'Expected "{", "[" or string'.  Close, but no cigar.  Fix it to
Expected '"{", "[", string, boolean or "null"'.

Missed in commit e53188a.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-48-git-send-email-armbru@redhat.com>
2017-03-16 07:13:04 +01:00
Markus Armbruster
c261394978 qapi: Make pylint a bit happier
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-47-git-send-email-armbru@redhat.com>
2017-03-16 07:13:04 +01:00
Markus Armbruster
6bbfb12de6 qapi: Drop unused .check_clash() parameter schema
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-46-git-send-email-armbru@redhat.com>
2017-03-16 07:13:04 +01:00
Markus Armbruster
768562ded0 qapi: union_types is a list used like a dict, make it one
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-45-git-send-email-armbru@redhat.com>
2017-03-16 07:13:04 +01:00
Markus Armbruster
ed285bf821 qapi: struct_types is a list used like a dict, make it one
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-44-git-send-email-armbru@redhat.com>
2017-03-16 07:13:04 +01:00
Markus Armbruster
5f018446fe qapi: enum_types is a list used like a dict, make it one
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-43-git-send-email-armbru@redhat.com>
2017-03-16 07:13:04 +01:00
Markus Armbruster
6f05345f8f qapi: Factor add_name() calls out of the meta conditional
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-42-git-send-email-armbru@redhat.com>
2017-03-16 07:13:04 +01:00
Markus Armbruster
eda43c6844 qapi: Simplify what gets stored in enum_types
Don't invent a new dictionary structure just for enum_types, simply
store the defining expression, like we do for struct_types and
union_types.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-41-git-send-email-armbru@redhat.com>
2017-03-16 07:13:04 +01:00
Markus Armbruster
062e856b15 qapi: Drop unused variable events
Missed in commit e98859a

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-40-git-send-email-armbru@redhat.com>
2017-03-16 07:13:04 +01:00
Markus Armbruster
a9f396b028 qapi: Eliminate check_docs() and drop QAPIDoc.expr
Move what's left in check_docs() to check_expr().  Delegate the actual
checking to new QAPIDoc.check_expr().

QAPIDoc.expr is now unused; drop it.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-39-git-send-email-armbru@redhat.com>
2017-03-16 07:13:04 +01:00
Markus Armbruster
816a57cd6e qapi: Fix detection of bogus member documentation
check_definition_doc() checks for member documentation without a
matching member.  It laboriously second-guesses what members
QAPISchema._def_exprs() will create.  That's a stupid game.

Move the check into QAPISchema.check(), where the members are known.
Delegate the actual checking to new QAPIDoc.check().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-38-git-send-email-armbru@redhat.com>
2017-03-16 07:13:04 +01:00
Markus Armbruster
f641d06ad6 tests/qapi-schema: Improve coverage of bogus member docs
New test doc-bad-union-member.json shows we can fail to reject
documentation for nonexistent members.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-37-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
bdc001caaa tests/qapi-schema: Rename doc-bad-args to doc-bad-command-arg
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-36-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
4ea7148e89 qapi: Move empty doc section checking to doc parser
Results in a more precise error location, but the real reason is
emptying out check_docs() step by step.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-35-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
2d433236df qapi: Improve error message on @NAME: in free-form doc
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-34-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
7947016d1c qapi: Move detection of doc / expression name mismatch
Move the check whether the doc matches the expression name from
check_definition_doc() to check_exprs().  This changes the error
location from the comment to the expression.  Makes sense as the
message talks about the expression: "Definition of '%s' follows
documentation for '%s'".  It's also a step towards getting rid of
check_docs().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-33-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
e7823a2adf qapi: Fix detection of doc / expression mismatch
This fixes the errors uncovered by the previous commit.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-32-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
2028be8eea tests/qapi-schema: Improve doc / expression mismatch coverage
New tests doc-before-include.json and doc-before-pragma.json show we
fail to reject a misplaced expression comment.

New test doc-no-symbol.json shows a bad error message.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1489582656-31133-31-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
75b50196d9 qapi2texi: Use category "Object" for all object types
At the protocol level, the distinction between struct, flat union and
simple union is meaningless, they are all JSON objects.  Document them
that way.

Example change (qemu-qmp-ref.txt):

- -- Simple Union: InputEvent
+ -- Object: InputEvent

      Input event union.

This also fixes the completely broken headings for flat and simple
unions in qemu-qmp-ref.7 and qemu-ga-ref.7, by sidestepping a bug in
texi2pod.pl.  For instance, it mistranslates "@deftp {Simple Union}
InputEvent" to "B<Union> (Simple)", but translates "@deftp Object
InputEvent" to "B<SocketAddress> (Object)".

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-30-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
c19eaa64df qapi2texi: Generate descriptions for simple union tags
Simple union tags carry no type information, because their type is
implicit.  Their description should make up for it, but many have
none.  Generate one automatically then.

Example change (qemu-qmp-ref.txt):

  -- Simple Union: ImageInfoSpecific

      A discriminated record of image format specific information
      structures.

      Members:
      'type'
-          Not documented
+          One of "qcow2", "vmdk", "luks"
      'data: ImageInfoSpecificQCow2' when 'type' is "qcow2"
      'data: ImageInfoSpecificVmdk' when 'type' is "vmdk"
      'data: QCryptoBlockInfoLUKS' when 'type' is "luks"

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-29-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
5169cd8767 qapi2texi: Generate documentation for variant members
A flat union's branch brings in the members of another type.  Generate
a suitable reference to that type.

Example change (qemu-qmp-ref.txt):

  -- Flat Union: QCryptoBlockOpenOptions

      The options that are available for all encryption formats when
      opening an existing volume

      Members:
      The members of 'QCryptoBlockOptionsBase'
+     The members of 'QCryptoBlockOptionsQCow' when 'format' is "qcow"
+     The members of 'QCryptoBlockOptionsLUKS' when 'format' is "luks"

      Since: 2.6

A simple union's branch adds a member 'data' of some other type.
Generate documentation for that member.

Example change (qemu-qmp-ref.txt):

  -- Simple Union: SocketAddress

      Captures the address of a socket, which could also be a named file
      descriptor

      Members:
      'type'
	   Not documented
+     'data: InetSocketAddress' when 'type' is "inet"
+     'data: UnixSocketAddress' when 'type' is "unix"
+     'data: VsockSocketAddress' when 'type' is "vsock"
+     'data: String' when 'type' is "fd"

      Since: 1.3

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-28-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
88f63467c5 qapi2texi: Generate reference to base type members
The generated documentation doesn't mention object type members
inherited from a base type.  Fix that.

Example change (qemu-qmp-ref.txt):

  -- Struct: VncServerInfo

      The network connection information for server

      Members:
      'auth' (optional)
	   authentication method used for the plain (non-websocket) VNC
	   server
+     The members of 'VncBasicInfo'

      Since: 2.1

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-27-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
691e03133e qapi2texi: Include member type in generated documentation
The recent merge of docs/qmp-commands.txt and docs/qmp-events.txt into
the schema lost type information.  Fix this documentation regression.

Example change (qemu-qmp-ref.txt):

  -- Struct: InputKeyEvent

      Keyboard input event.

      Members:
-     'button'
+     'button: InputButton'
           Which button this event is for.
-     'down'
+     'down: boolean'
           True for key-down and false for key-up events.

      Since: 2.0

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-26-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
c2dd311cb7 qapi2texi: Implement boxed argument documentation
This replaces manual references like "For the arguments, see the
documentation of ..." by a generated reference "Arguments: the members
of ...".

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-25-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
2c99f5fdc8 qapi2texi: Don't hide undocumented members and arguments
Show undocumented object, alternate type members and command, event
arguments exactly like undocumented enumeration type values.

Example change (qemu-qmp-ref.txt):

  -- Command: query-rocker

      Return rocker switch information.

+     Arguments:
+     'name'
+          Not documented
+
      Returns: 'Rocker' information

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-24-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
5da19f14ff qapi2texi: Explain enum value undocumentedness more clearly
Instead of not saying anything when we have no documentation, say "Not
documented".

Example change (qemu-qmp-ref.txt):

  -- Enum: GuestPanicAction

      An enumeration of the actions taken when guest OS panic is detected

      Values:
      'pause'
           system pauses
      'poweroff'
+          Not documented

      Since: 2.1 (poweroff since 2.8)

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-23-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
2a1183ce93 qapi2texi: Present the table of members more clearly
The table of members follows the main descriptive text immediately.
Makes it hard to see what it is about.  Start a new paragraph, and
lead with a line "Members:" for object and alternate types, "Values:"
for enumeration types, and "Arguments:" for commands and events.

Example change (qemu-qmp-ref.txt):

  -- Command: set_link

      Sets the link status of a virtual network adapter.
+
+     Arguments:
      'name'
           the device name of the virtual network adapter
      'up'
           true to set the link status to be up

      Returns: Nothing on success If 'name' is not a valid network
      device, DeviceNotFound

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-22-git-send-email-armbru@redhat.com>
2017-03-16 07:13:03 +01:00
Markus Armbruster
71d918a1b1 qapi2texi: Plainer enum value and member name formatting
Use @code{%s} instead of @code{'%s'}.  Impact, using @id as example:

* Texinfo
  -@item @code{'id'}
  +@item @code{id}

* HTML
  -<dt><code>'id'</code></dt>
  +<dt><code>id</code></dt>

* POD (for manual pages):
  -=item C<'id'>
  +=item C<id>

* Formatted manual pages:
  -'id'
  +"id"

* Plain text:
  -     ''id''
  +     'id'

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-21-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
ef801a9bb1 qapi: Prefer single-quoted strings more consistently
PEP 8 advises:

    In Python, single-quoted strings and double-quoted strings are the
    same.  This PEP does not make a recommendation for this.  Pick a
    rule and stick to it.  When a string contains single or double
    quote characters, however, use the other one to avoid backslashes
    in the string.  It improves readability.

The QAPI generators succeed at picking a rule, but fail at sticking to
it.  Convert a bunch of double-quoted strings to single-quoted ones.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-20-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
0fe675af77 qapi: Use raw strings for regular expressions consistently
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-19-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
1d8bda128d qapi: The #optional tag is redundant, drop
We traditionally mark optional members #optional in the doc comment.
Before commit 3313b61, this was entirely manual.

Commit 3313b61 added some automation because its qapi2texi.py relied
on #optional to determine whether a member is optional.  This is no
longer the case since the previous commit: the only thing qapi2texi.py
still does with #optional is stripping it out.  We still reject bogus
qapi-schema.json and six places for qga/qapi-schema.json.

Thus, you can't actually rely on #optional to see whether something is
optional.  Yet we still make people add it manually.  That's just
busy-work.

Drop the code to check, fix up and strip out #optional, along with all
instances of #optional.  To keep it out, add code to reject it, to be
dropped again once the dust settles.

No change to generated documentation.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-18-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
aa964b7fdc qapi2texi: Convert to QAPISchemaVisitor
qapi2texi works with schema expression trees.  Such a tight coupling
to schema language syntax is not a good idea.  Convert it to the visitor
interface the other generators use.

No change to generated documentation.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-17-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
860e877861 qapi: Conjure up QAPIDoc.ArgSection for undocumented members
qapi2texi.py already conjures up ArgSections for undocumented
enumeration values, in texi_enum.  Drop that, and conjure them up for
all kinds of "arguments" (enumeration values, object and alternate
type members) in qapi.py instead.

Take care to keep generated documentation exactly the same for now.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-16-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
069fb5b250 qapi: Prepare for requiring more complete documentation
We currently neglect to check all enumeration values, common members
of object types and members of alternate types are documented.
Unsurprisingly, many aren't.

Add the necessary plumbing to find undocumented ones, except for
variant members of object types.  Don't enforce anything just yet, but
connect each QAPIDoc.ArgSection to its QAPISchemaMember.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-15-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
4636211e4d qapi: Fix QAPISchemaEnumType.is_implicit() for 'QType'
Missed in commit 7264f5c.  Harmless, because nothing checks whether an
enumeration type is implicit so far.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-14-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
8c0aa61318 qapi/rocker: Fix up doc comment notes on optional members
Talking about #optional like this

    # Note: fields are marked #optional to indicate that they may or may
    # not appear ...

doesn't work so well in generated documentation, because the #optional
tag is not visible there.  Replace by

    # Note: optional members may or may not appear ...

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1489582656-31133-13-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
b116fd8e30 qapi: Avoid unwanted blank lines in QAPIDoc
We silently fix missing #optional tags for QAPIDoc by appending a line
"#optional" to the section's .content.  However, this interferes with
.__repr__ stripping trailing blank lines from .content.

Use new ArgSection instance variable .optional instead, and leave
.content alone.

To permit testing .optional in texi_body(), clean up texi_enum()'s
hack to add empty documentation for undocumented enum values: add an
ArgSection instead of ''.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1489582656-31133-12-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
42bebcc129 qapi2texi: Fix up output around #optional
We use tag #optional to mark optional members, like this:

    # @name: #optional The name of the guest

texi_body() strips #optional, but not whitespace around it.  For the
above, we get in qemu-qmp-qapi.texi

    @item @code{'name'} (optional)
     The name of the guest
    @end table

The extra space can lead to artifacts in output, e.g in
qemu-qmp-ref.7.pod

    =item C<'name'> (optional)

     The name of the guest

and then in qemu-qmp-ref.7

    .IX Item "name (optional)"
    .Vb 1
    \& The name of the guest
    .Ve

instead of intended plain

    .IX Item "name (optional)"
    The name of the guest

Get rid of these artifacts by removing whitespace around #optional
along with it.

This turns three minus signs in qapi-schema.json into markup, because
they're now at the beginning of the line.  Drop them, they're unwanted
there.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1489582656-31133-11-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
4815374513 qapi: Fix to reject empty union base gracefully
Common Python pitfall: 'assert base_members' fires on [] in addition
to None.  Correct to 'assert base_members is not None'.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-10-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
707fb2d381 tests/qapi-schema: Cover empty union base
The new test case shows off qapi.py choking on an empty union base.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-9-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
bd7f974796 qapi: Clean up build of generated documentation
Rename intermediate qemu-qapi.texi to qemu-qmp-qapi.texi to match its
user qemu-qmp-ref.texi, just like qemu-ga-qapi.texi matches
qemu-ga-ref.texi.

Build the intermediate .texi next to the sources and the final output
in docs/ instead of dumping them into the build root.

Fix version.texi dependencies so that only the targets that actually
need it depend on it.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-8-git-send-email-armbru@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
2cfbae3c42 qapi: Have each QAPI schema declare its name rule violations
qapi.py has a hardcoded white-list of type names that may violate the
rule on use of upper and lower case.  Add a new pragma directive
'name-case-whitelist', and use it to replace the hard-coded
white-list.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1489582656-31133-7-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
1554a8fae9 qapi: Have each QAPI schema declare its returns white-list
qapi.py has a hardcoded white-list of command names that may violate
the rules on permitted return types.  Add a new pragma directive
'returns-whitelist', and use it to replace the hard-coded white-list.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1489582656-31133-6-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-16 07:13:02 +01:00
Markus Armbruster
700dc9f503 docs/qapi-code-gen.txt: Drop confusing reference to 'gen'
Section "Commands" qualifies its rules on permitted argument and
return types "with one exception noted below when 'gen' is used".  The
note went away in commit 2d21291.  Clean up the dangling references.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1489582656-31133-5-git-send-email-armbru@redhat.com>
2017-03-16 07:13:01 +01:00
Markus Armbruster
87c16dceca qapi: Back out doc comments added just to please qapi.py
This reverts commit 3313b61's changes to tests/qapi-schema/, except
for tests/qapi-schema/doc-*.

We could keep some of these doc comments to serve as positive test
cases.  However, they don't actually add to what we get from doc
comment use in actual schemas, as we we don't test output matches
expectations, and don't systematically cover doc comment features.
Proper positive test coverage would be nice.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1489582656-31133-4-git-send-email-armbru@redhat.com>
2017-03-16 07:13:01 +01:00
Markus Armbruster
bc52d03ff5 qapi: Make doc comments optional where we don't need them
Since we added the documentation generator in commit 3313b61, doc
comments are mandatory.  That's a very good idea for a schema that
needs to be documented, but has proven to be annoying for testing.

Make doc comments optional again, but add a new directive

    { 'pragma': { 'doc-required': true } }

to let a QAPI schema require them.

Add test cases for the new pragma directive.  While there, plug a
minor hole in includ directive test coverage.

Require documentation in the schemas we actually want documented:
qapi-schema.json and qga/qapi-schema.json.

We could probably make qapi2texi.py cope with incomplete
documentation, but for now, simply make it refuse to run unless the
schema has 'doc-required': true.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1489582656-31133-3-git-send-email-armbru@redhat.com>
[qapi-code-gen.txt wording tweaked]
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-16 07:13:01 +01:00
Markus Armbruster
e04dea8872 qapi: Factor QAPISchemaParser._include() out of .__init__()
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1489582656-31133-2-git-send-email-armbru@redhat.com>
2017-03-16 07:13:01 +01:00
Daniel P. Berrange
f880cd6b6f qmp: allow setting properties to empty string in qmp-shell
The qmp-shell property parser currently rejects attempts to
set string properties to the empty string eg

  (QEMU) migrate-set-parameters  tls-hostname=
  Error while parsing command line: Expected a key=value pair, got 'tls-hostname='
command format: <command-name>  [arg-name1=arg1] ... [arg-nameN=argN]

This is caused by checking the wrong condition after splitting
the parameter on '='. The "partition" method will return "" for
the separator field, if the seperator was not present, so that
is the correct thing to check for malformed syntax.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <20170302122429.7737-1-berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-03-16 07:13:01 +01:00
Marc-André Lureau
597494abde qapi2texi: change texi formatters
STRUCT_FMT is generic enough, rename it to TYPE_FMT, use it for unions.

Rename COMMAND_FMT to MSG_FMT, since it applies to both commands and
events.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170125130308.16104-2-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-03-16 07:12:54 +01:00
Daniel P. Berrange
8755b4afbd trace: ensure $(tracetool-y) is defined in top level makefile
The build rules for trace files have a dependancy on $(tracetool-y).
This variable populated in the trace/Makefile.objs file and thus its
definition gets pulled into the top level makefile. This happens too
late in the process though, so by the time $(tracetool-y) is defined,
make has already evaluated $(tracetool-y) in the dependancies and
found it to be empty. The result is that when the tracetool source
is changed, the generated files are not rebuilt. The solution is to
define the variable in the top level makefile too

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
Message-id: 20170315123421.28815-1-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-03-16 11:51:26 +08:00
Daniel P. Berrange
4175304e59 makefile: generate trace-events-all upfront
Files should not be created in the build dir during the
'make install' phase. List 'trace-events-all' as a
generated file so that it gets created upfront during
build.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170228122901.24520-3-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-03-16 11:51:15 +08:00
Daniel P. Berrange
4f04f13c2a makefile: merge GENERATED_HEADERS & GENERATED_SOURCES variables
The only functional difference between the GENERATED_HEADERS
and GENERATED_SOURCES variables is that 'Makefile' has a
dependancy on GENERATED_HEADERS, causing generated header files
to be created immediatey at the start of the build process.
There is no reason why this early creation should be restricted
to the .h files, and not include .c files too. Merge both of
the variables into a single GENERATED_FILES variable to make
it clear it is for any type of generated file.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170228122901.24520-2-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-03-16 11:51:15 +08:00
Li Qiang
d68f0f778e ide: ahci: call cleanup function in ahci unit
This can avoid memory leak when hotunplug the ahci device.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 1488449293-80280-4-git-send-email-liqiang6-s@360.cn
Signed-off-by: John Snow <jsnow@redhat.com>
2017-03-15 20:50:14 -04:00
Li Qiang
c9f086418a ide: core: add cleanup function
As the pci ahci can be hotplug and unplug, in the ahci unrealize
function it should free all the resource once allocated in the
realized function. This patch add ide_exit to free the resource.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 1488449293-80280-3-git-send-email-liqiang6-s@360.cn
Signed-off-by: John Snow <jsnow@redhat.com>
2017-03-15 20:50:14 -04:00
Li Qiang
44a109c1b3 ide: qdev: register ide bus unrealize function
we have an idebus unrealize function, but it was being
registered as the unrealize function for the IDE Device,
so it was not getting invoked on device teardown because
nothing is "unrealizing" the IDE devices themselves.

Suggested-by: John Snow <jsnow@redhat.com>
Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1488449293-80280-2-git-send-email-liqiang6-s@360.cn
Signed-off-by: John Snow <jsnow@redhat.com>
2017-03-15 20:50:14 -04:00
zhanghailiang
bdf4c4ec53 virtio-serial-bus: Delete timer from list before free it
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Amit Shah <amit@kernel.org>
2017-03-16 01:46:42 +02:00
Marcel Apfelbaum
27ce0f3afc hw/virtio: fix Power Management Control Register for PCI Express virtio devices
Make Power Management State flag writable to conform
with the PCI Express spec.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-16 01:46:41 +02:00
Marcel Apfelbaum
d584f1b9ca hw/virtio: fix Link Control Register for PCI Express virtio devices
Make several Link Control Register flags writable to conform
with the PCI Express spec.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-16 01:46:41 +02:00
Marcel Apfelbaum
c2cabb3422 hw/virtio: fix error enabling flags in Device Control register
When the virtio devices are PCI Express, make error-enabling flags
writable to respect the PCIe spec.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-16 01:46:40 +02:00
Marcel Apfelbaum
f03d8ea330 hw/pcie: fix Extended Configuration Space for devices with no Extended Capabilities
Absence of any Extended Capabilities is required to be
indicated by an Extended Capability header with a Capability ID of
0000h, a Capability Version of 0h, and a Next Capability Offset of 000h.

Instead of inserting a 'NULL' capability is simpler to mark the start
of the Extended Configuration Space as read-only to achieve the same
behaviour.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-16 01:46:40 +02:00
Mark Cave-Ayland
111308e789 Update OpenBIOS images to f233c3f built from submodule.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-03-15 19:42:08 +00:00
Peter Maydell
1883ff34b5 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, pc: fixes

Some fixes to fallback from using virtio caching,
pls a minor vm gen id fix.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Wed 15 Mar 2017 17:59:25 GMT
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  virtio-pci: reset modern vq meta data
  Revert "virtio: unbreak virtio-pci with IOMMU after caching ring translations"
  pci: introduce a bus master container
  virtio: validate address space cache during init
  virtio: destroy region cache during reset
  virtio: guard against NULL pfn
  Bugfix: Handle error if VM Generation ID device not present

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-15 18:44:05 +00:00
Jason Wang
60a8d80234 virtio-pci: reset modern vq meta data
We don't reset proxy->vqs[].{num|desc[]|avail[]|used[]}. This means if
a driver enable the vq without setting vq address after reset. The old
addresses were leaked. Fixing this by resetting modern vq meta data
during device reset.

Cc: qemu-stable@nongnu.org
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-15 19:59:18 +02:00
Jason Wang
f0edf23978 Revert "virtio: unbreak virtio-pci with IOMMU after caching ring translations"
This reverts commit
96a8821d21. Previous patch is a better
solution which does not require a strict order between virtio and IOMMU.

CC: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-15 19:59:00 +02:00
Peter Maydell
de71fb96bf Merge remote-tracking branch 'remotes/armbru/tags/pull-misc-2017-03-15' into staging
Miscellaneous patches for 2017-03-15

# gpg: Signature made Wed 15 Mar 2017 13:12:35 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-misc-2017-03-15:
  coverity-model: model address_space_read/write
  tests: Use error_free_or_abort() where appropriate

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-15 17:54:41 +00:00
Jason Wang
3716d5902d pci: introduce a bus master container
96a8821d21 ("virtio: unbreak virtio-pci with IOMMU after caching ring
translations") tries to make IOMMU works with virtio memory region
cache, but it requires IOMMU to be created before any virtio
devices. This is sub optimal, fixing this by introduce a bus master
container to make sure address space can be initialized during device
registering, and then we can safely set alias and make
bus_master_enable_region as its subregion during bus master
initialization.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-15 19:37:19 +02:00
Jason Wang
e45da65322 virtio: validate address space cache during init
We don't check the return value of address_space_cache_init(), this
may lead buggy driver use incorrect region caches. Instead of
triggering an assert, catch and warn this early in
virtio_init_region_cache().

Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-15 19:37:19 +02:00
Jason Wang
e0e2d64409 virtio: destroy region cache during reset
We don't destroy region cache during reset which can make the maps
of previous driver leaked to a buggy or malicious driver that don't
set vring address before starting to use the device. Fix this by
destroy the region cache during reset and validate it before trying to
see them.

Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-15 19:37:19 +02:00
Jason Wang
168e4af3c1 virtio: guard against NULL pfn
To avoid access stale memory region cache after reset, this patch
check the existence of virtqueue pfn for all exported virtqueue access
helpers before trying to use them.

Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-15 19:37:19 +02:00
Ben Warren
72d9196f1e Bugfix: Handle error if VM Generation ID device not present
This was crashing due to NULL-pointer dereference

QMP Test case:
==============

(QEMU) query-vm-generation-id
{"error": {"class": "GenericError", "desc": "VM Generation ID device not
found"}}

HMP Test case:
==============
virsh # qemu-monitor-command --hmp 3 info vm-generation-id
VM Generation ID device not found

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-15 19:37:19 +02:00
Peter Maydell
7584bf5e6f Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Wed 15 Mar 2017 05:05:04 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  os: don't corrupt pre-existing memory-backend data with prealloc

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-15 14:19:59 +00:00
Peter Maydell
926e368388 Merge remote-tracking branch 'remotes/ehabkost/tags/machine-pull-request' into staging
Fix global property and -cpu handling bug

This bug fix was supposed to be applied just after 2.8.0 was
released, but it slipped through the cracks. Sending it now for
the next -rc.

# gpg: Signature made Tue 14 Mar 2017 20:04:50 GMT
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/machine-pull-request:
  machine: Convert abstract typename on compat_props to subclass names

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-15 13:07:08 +00:00
Paolo Bonzini
4d0e72396b coverity-model: model address_space_read/write
Commit eb7eeb8 ("memory: split address_space_read and
address_space_write", 2015-12-17) made address_space_rw
dispatch to one of address_space_read or address_space_write,
rather than vice versa.

For callers of address_space_read and address_space_write this
causes false positive defects when Coverity sees a length-8 write in
address_space_read and a length-4 (e.g. int*) buffer to read into.
As long as the size of the buffer is okay, this is a false positive.

Reflect the code change into the model.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170315081641.20588-1-pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-03-15 13:59:16 +01:00
Markus Armbruster
157db293eb tests: Use error_free_or_abort() where appropriate
Done with this Coccinelle semantic patch:

    @@
    expression E;
    @@
    -    g_assert(E);
    -    error_free(E);
    +    error_free_or_abort(&E);

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487362554-5688-1-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-15 08:52:09 +01:00
Daniel P. Berrange
9dc44aa582 os: don't corrupt pre-existing memory-backend data with prealloc
When using a memory-backend object with prealloc turned on, QEMU
will memset() the first byte in every memory page to zero. While
this might have been acceptable for memory backends associated
with RAM, this corrupts application data for NVDIMMs.

Instead of setting every page to zero, read the current byte
value and then just write that same value back, so we are not
corrupting the original data. Directly write the value instead
of memset()ing it, since there's no benefit to memset for a
single byte write.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Message-id: 20170303113255.28262-1-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-03-15 11:55:41 +08:00
Eduardo Habkost
0bcba41fe3 machine: Convert abstract typename on compat_props to subclass names
Original problem description by Greg Kurz:

> Since commit "9a4c0e220d8a hw/virtio-pci: fix virtio
> behaviour", passing -device virtio-blk-pci.disable-modern=off
> has no effect on 2.6 machine types because the internal
> virtio-pci.disable-modern=on compat property always prevail.

The same bug also affects other abstract type names mentioned on
compat_props by machine-types: apic-common, i386-cpu, pci-device,
powerpc64-cpu, s390-skeys, spapr-pci-host-bridge, usb-device,
virtio-pci, x86_64-cpu.

The right fix for this problem is to make sure compat_props and
-global options are always applied in the order they are
registered, instead of reordering them based on the type
hierarchy. But changing the ordering rules of -global is risky
and might break existing configurations, so we shouldn't do that
on a stable branch.

This is a temporary hack that will work around the bug when
registering compat_props properties: if we find an abstract class
on compat_props, register properties for all its non-abstract
subtypes instead. This will make sure -global won't be overridden
by compat_props, while keeping the existing ordering rules on
-global options.

Note that there's one case that won't be fixed by this hack:
"-global spapr-pci-vfio-host-bridge.<option>=<value>" won't be
able to override compat_props, because spapr-pci-host-bridge is
not an abstract class.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <1481575745-26120-1-git-send-email-ehabkost@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-03-14 16:53:44 -03:00
Peter Maydell
d84f714eaf Update version for v2.9.0-rc0 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-14 19:18:23 +00:00
Peter Maydell
64c358a33a Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* "x" monitor command fix for KVM (Christian)
* MemoryRegion name documentation (David)
* mem-prealloc optimization (Jitendra)
* -icount/MTTCG fixes (me)
* "info mtree" niceness (Peter)
* NBD drop_sync buffer overflow (Vladimir/Eric)
* small cleanups and bugfixes (Li, Lin, Suramya, Thomas)
* fix for "-device kvmclock" w/TCG (Eduardo)
* debug output before crashing on KVM_{GET,SET}_MSRS (Eduardo)

# gpg: Signature made Tue 14 Mar 2017 13:42:05 GMT
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  nbd/client: fix drop_sync [CVE-2017-2630]
  memory: info mtree check mr range overflow
  icount: process QEMU_CLOCK_VIRTUAL timers in vCPU thread
  main-loop: remove now unnecessary optimization
  cpus: define QEMUTimerListNotifyCB for QEMU system emulation
  qemu-timer: do not include sysemu/cpus.h from util/qemu-timer.h
  qemu-timer: fix off-by-one
  target/nios2: take BQL around interrupt check
  scsi: mptsas: fix the wrong reading size in fetch request
  util: Removed unneeded header from path.c
  configure: add the missing help output for optional features
  scripts/dump-guest-memory.py: fix int128_get64 on recent gcc
  kvmclock: Don't crash QEMU if KVM is disabled
  kvm: Print MSR information if KVM_{GET,SET}_MSRS failed
  exec: add cpu_synchronize_state to cpu_memory_rw_debug
  mem-prealloc: reduce large guest start-up and migration time.
  docs: Add a note about mixing bootindex with "-boot order"
  memory_region: Fix name comments

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-14 16:52:17 +00:00
Peter Maydell
5e2fb7c598 hw/misc/imx6_src: Don't crash trying to reset missing CPUs
Commit 4881658a4b introduced a call to arm_get_cpu_by_id(),
and Coverity noticed that we weren't checking that it didn't
return NULL (CID 1371652).

Normally this won't happen (because all 4 CPUs are expected
to exist), but it's possible the user requested fewer CPUs
on the command line. Handle this possibility by silently
doing nothing, which is the same behaviour as before commit
4881658a4b and also how we handle the other CPU operations
(since we ignore the INVALID_PARAM returns from arm_set_cpu_on()
and friends).

There is a slight behavioural difference to the pre-4881658a4b
situation: the "reset this core" bit will remain set rather
than not being permitted to be set. The imx6 datasheet is
unclear about the behaviour in this odd corner case, so we
opt for the simpler code rather than complicated logic to
maintain identical behaviour.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488542374-1256-1-git-send-email-peter.maydell@linaro.org
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-03-14 16:13:22 +00:00
Programmingkid
5f26fcfb98 ui/cocoa.m: add toast file support
Add the ability for the user to use .toast files with QEMU. This format works
just like ISO files.

Signed-off-by: John Arbuckle <programmingkidx@gmail.com>
Message-id: 0C9DA454-E3DC-4291-806E-9A96557DE833@gmail.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-14 15:09:56 +00:00
Peter Maydell
486fc7a837 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170314' into staging
target-arm queue:
 * arm-powerctl: Fix psci info return values
 * implement armv8 PMUSERENR (user-mode enable bits)

# gpg: Signature made Tue 14 Mar 2017 11:31:11 GMT
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170314:
  target/arm/arm-powerctl: Fix psci info return values
  target/arm: implement armv8 PMUSERENR (user-mode enable bits)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-14 14:11:38 +00:00
Vladimir Sementsov-Ogievskiy
2563c9c6b8 nbd/client: fix drop_sync [CVE-2017-2630]
Comparison symbol is misused. It may lead to memory corruption.
Introduced in commit 7d3123e.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170203154757.36140-6-vsementsov@virtuozzo.com>
[eblake: add CVE details, update conditional]
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170307151627.27212-1-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 14:41:19 +01:00
Peter Xu
b31f841262 memory: info mtree check mr range overflow
The address of memory regions might overflow when something wrong
happened, like reported in:

https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg02043.html

For easier debugging, let's try to detect it.

Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1489496187-624-1-git-send-email-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:57:52 +01:00
Paolo Bonzini
6b8f0187a4 icount: process QEMU_CLOCK_VIRTUAL timers in vCPU thread
icount has become much slower after tcg_cpu_exec has stopped
using the BQL.  There is also a latent bug that is masked by
the slowness.

The slowness happens because every occurrence of a QEMU_CLOCK_VIRTUAL
timer now has to wake up the I/O thread and wait for it.  The rendez-vous
is mediated by the BQL QemuMutex:

- handle_icount_deadline wakes up the I/O thread with BQL taken
- the I/O thread wakes up and waits on the BQL
- the VCPU thread releases the BQL a little later
- the I/O thread raises an interrupt, which calls qemu_cpu_kick
- the VCPU thread notices the interrupt, takes the BQL to
  process it and waits on it

All this back and forth is extremely expensive, causing a 6 to 8-fold
slowdown when icount is turned on.

One may think that the issue is that the VCPU thread is too dependent
on the BQL, but then the latent bug comes in.  I first tried removing
the BQL completely from the x86 cpu_exec, only to see everything break.
The only way to fix it (and make everything slow again) was to add a dummy
BQL lock/unlock pair.

This is because in -icount mode you really have to process the events
before the CPU restarts executing the next instruction.  Therefore, this
series moves the processing of QEMU_CLOCK_VIRTUAL timers straight in
the vCPU thread when running in icount mode.

The required changes include:

- make the timer notification callback wake up TCG's single vCPU thread
  when run from another thread.  By using async_run_on_cpu, the callback
  can override all_cpu_threads_idle() when the CPU is halted.

- move handle_icount_deadline after qemu_tcg_wait_io_event, so that
  the timer notification callback is invoked after the dummy work item
  wakes up the vCPU thread

- make handle_icount_deadline run the timers instead of just waking the
  I/O thread.

- stop processing the timers in the main loop

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:51:34 +01:00
Paolo Bonzini
e330c118f2 main-loop: remove now unnecessary optimization
This optimization is not necessary anymore, because the vCPU now drops
the I/O thread lock even with TCG.  Drop it to simplify the code and
avoid the "I/O thread spun for 1000 iterations" warning.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:29:21 +01:00
Paolo Bonzini
3f53bc61a4 cpus: define QEMUTimerListNotifyCB for QEMU system emulation
There is no change for now, because the callback just invokes
qemu_notify_event.

Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:28:29 +01:00
Paolo Bonzini
d2528bdc19 qemu-timer: do not include sysemu/cpus.h from util/qemu-timer.h
This dependency is the wrong way, and we will need util/qemu-timer.h from
sysemu/cpus.h in the next patch.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:28:18 +01:00
Paolo Bonzini
33bef0b994 qemu-timer: fix off-by-one
If the first timer is exactly at the current value of the clock, the
deadline is met and the timer should fire.  This fixes itself on the next
iteration of the loop without icount; with icount, however, execution
of instructions will stop exactly at the deadline and won't proceed.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:42 +01:00
Paolo Bonzini
c0d24e7f70 target/nios2: take BQL around interrupt check
The interrupt controller does not have its own locking.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:37 +01:00
Li Qiang
b01a2d07c9 scsi: mptsas: fix the wrong reading size in fetch request
When fetching request, it should read sizeof(*hdr), not the
pointer hdr.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-Id: <1489488980-130668-1-git-send-email-liqiang6-s@360.cn>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:37 +01:00
Suramya Shah
bd5d983fa8 util: Removed unneeded header from path.c
Signed-off-by: Suramya Shah <shah.suramya@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170310163948.7567-1-shah.suramya@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:37 +01:00
c12d66aac1 configure: add the missing help output for optional features
Signed-off-by: Lin Ma <lma@suse.com>
Message-Id: <20170310101405.26974-1-lma@suse.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:36 +01:00
Marc-André Lureau
9b4b157ef6 scripts/dump-guest-memory.py: fix int128_get64 on recent gcc
The Int128 is no longer a struct, reaching a python exception:
Python Exception <class 'gdb.error'> Attempt to extract a component of a value that is not a (null).:

Replace struct access with a cast to uint64[] instead.

Fixes:
https://bugzilla.redhat.com/show_bug.cgi?id=1427466

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170310112819.16760-1-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:36 +01:00
Eduardo Habkost
ca2edcd35c kvmclock: Don't crash QEMU if KVM is disabled
Most machines don't allow sysbus devices like "kvmclock" to be
created from the command-line, but some of them do (the ones with
has_dynamic_sysbus=true). In those cases, it's possible to
manually create a kvmclock device without KVM being enabled,
making QEMU crash:

  $ qemu-system-x86_64 -machine q35,accel=tcg -device kvmclock
  Segmentation fault (core dumped)

This changes kvmclock's realize method to return an error if KVM
is disabled, to ensure it won't crash QEMU.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170309185046.17555-1-ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:36 +01:00
Eduardo Habkost
c70b11d160 kvm: Print MSR information if KVM_{GET,SET}_MSRS failed
When a KVM_{GET,SET}_MSRS ioctl() fails, it is difficult to find
out which MSR caused the problem. Print an error message for
debugging, before we trigger the (ret == cpu->kvm_msr_buf->nmsrs)
assert.

Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170309194634.28457-1-ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:36 +01:00
Christian Borntraeger
79ca7a1b89 exec: add cpu_synchronize_state to cpu_memory_rw_debug
I sometimes got "Cannot access memory" when using the x command
on the monitor. Turns out that the cpu env did contain stale data
(e.g. wrong control register content for page table origin).
We must synchronize the state of the CPU before walking the page
tables. A similar issues happens for a remote gdb, so lets
do the cpu_synchronize_state in cpu_memory_rw_debug.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <1488896348-13560-1-git-send-email-borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:36 +01:00
Jitendra Kolhe
1e356fc14b mem-prealloc: reduce large guest start-up and migration time.
Using "-mem-prealloc" option for a large guest leads to higher guest
start-up and migration time. This is because with "-mem-prealloc" option
qemu tries to map every guest page (create address translations), and
make sure the pages are available during runtime. virsh/libvirt by
default, seems to use "-mem-prealloc" option in case the guest is
configured to use huge pages. The patch tries to map all guest pages
simultaneously by spawning multiple threads. Currently limiting the
change to QEMU library functions on POSIX compliant host only, as we are
not sure if the problem exists on win32. Below are some stats with
"-mem-prealloc" option for guest configured to use huge pages.

------------------------------------------------------------------------
Idle Guest      | Start-up time | Migration time
------------------------------------------------------------------------
Guest stats with 2M HugePage usage - single threaded (existing code)
------------------------------------------------------------------------
64 Core - 4TB   | 54m11.796s    | 75m43.843s
64 Core - 1TB   | 8m56.576s     | 14m29.049s
64 Core - 256GB | 2m11.245s     | 3m26.598s
------------------------------------------------------------------------
Guest stats with 2M HugePage usage - map guest pages using 8 threads
------------------------------------------------------------------------
64 Core - 4TB   | 5m1.027s      | 34m10.565s
64 Core - 1TB   | 1m10.366s     | 8m28.188s
64 Core - 256GB | 0m19.040s     | 2m10.148s
-----------------------------------------------------------------------
Guest stats with 2M HugePage usage - map guest pages using 16 threads
-----------------------------------------------------------------------
64 Core - 4TB   | 1m58.970s     | 31m43.400s
64 Core - 1TB   | 0m39.885s     | 7m55.289s
64 Core - 256GB | 0m11.960s     | 2m0.135s
-----------------------------------------------------------------------

Changed in v2:
 - modify number of memset threads spawned to min(smp_cpus, 16).
 - removed 64GB memory restriction for spawning memset threads.

Changed in v3:
 - limit number of threads spawned based on
   min(sysconf(_SC_NPROCESSORS_ONLN), 16, smp_cpus)
 - implement memset thread specific siglongjmp in SIGBUS signal_handler.

Changed in v4
 - remove sigsetjmp/siglongjmp and SIGBUS unblock/block for main thread
   as main thread no longer touches any pages.
 - simplify code my returning memset_thread_failed status from
   touch_all_pages.

Signed-off-by: Jitendra Kolhe <jitendra.kolhe@hpe.com>
Message-Id: <1487907103-32350-1-git-send-email-jitendra.kolhe@hpe.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:36 +01:00
Thomas Huth
c0d9f7d0bc docs: Add a note about mixing bootindex with "-boot order"
Occasionally the users try to mix the bootindex properties with the
"-boot order" parameter - and this likely does not give the expected
results. So let's add a proper statement that these two concepts
should not be used together.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1488303601-23741-1-git-send-email-thuth@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:36 +01:00
Dr. David Alan Gilbert
e8f5fe2de1 memory_region: Fix name comments
The 'name' parameter to memory_region_init_* had been marked as debug
only, however vmstate_region_ram uses it as a parameter to
qemu_ram_set_idstr to set RAMBlock names and these form part of the
migration stream.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170309152708.30635-1-dgilbert@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-14 13:26:36 +01:00
Andrew Jones
d5affb0d86 target/arm/arm-powerctl: Fix psci info return values
The power state spec section 5.1.5 AFFINITY_INFO defines the
affinity info return values as

  0 ON
  1 OFF
  2 ON_PENDING

I grepped QEMU for power_state to ensure that no assumptions
of OFF=0 were being made.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-id: 20170303123232.4967-1-drjones@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-14 11:28:54 +00:00
Andrew Baumann
6ecd0b6ba0 target/arm: implement armv8 PMUSERENR (user-mode enable bits)
In armv8, this register implements more than a single bit, with
fine-grained enables for read access to event counters, cycles
counters, and write access to the software increment. This change
implements those checks using custom access functions for the relevant
registers.

Signed-off-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
Message-id: 20170228215801.10472-2-Andrew.Baumann@microsoft.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: move a couple of access functions to be only compiled
 ifndef CONFIG_USER_ONLY to avoid compiler warnings]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-14 11:28:54 +00:00
Peter Maydell
591bce29b1 Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Tue 14 Mar 2017 07:55:01 GMT
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  hw/net: implement MIB counters in mcf_fec driver
  COLO-compare: Fix trace_event print bug
  e1000e: correctly tear down MSI-X memory regions

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-14 11:15:00 +00:00
Peter Maydell
94b5d57d2f Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.9-20170314' into staging
ppc patch queue for 2017-03-14

This set has a handful og bugfixes to go into qemu-2.9.  This includes
an update to the dtc/libfdt submodule which will fix the build errors
seen on some distributions.

# gpg: Signature made Tue 14 Mar 2017 04:00:41 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.9-20170314:
  dtc: Update submodule to avoid build errors
  pseries: Don't expose PCIe extended config space on older machine types
  target/ppc: fix cpu_ov setting for 32-bit
  target/ppc: Fix wrong number of UAMR register

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-14 10:13:19 +00:00
Christopher Covington
4d04351f4c build: include sys/sysmacros.h for major() and minor()
The definition of the major() and minor() macros are moving within glibc to
<sys/sysmacros.h>. Include this header when it is available to avoid the
following sorts of build-stopping messages:

qga/commands-posix.c: In function ‘dev_major_minor’:
qga/commands-posix.c:656:13: error: In the GNU C Library, "major" is defined
 by <sys/sysmacros.h>. For historical compatibility, it is
 currently defined by <sys/types.h> as well, but we plan to
 remove this soon. To use "major", include <sys/sysmacros.h>
 directly. If you did not intend to use a system-defined macro
 "major", you should undefine it after including <sys/types.h>. [-Werror]
         *devmajor = major(st.st_rdev);
             ^~~~~~~~~~~~~~~~~~~~~~~~~~

qga/commands-posix.c:657:13: error: In the GNU C Library, "minor" is defined
 by <sys/sysmacros.h>. For historical compatibility, it is
 currently defined by <sys/types.h> as well, but we plan to
 remove this soon. To use "minor", include <sys/sysmacros.h>
 directly. If you did not intend to use a system-defined macro
 "minor", you should undefine it after including <sys/types.h>. [-Werror]
         *devminor = minor(st.st_rdev);
             ^~~~~~~~~~~~~~~~~~~~~~~~~~

The additional include allows the build to complete on Fedora 26 (Rawhide)
with glibc version 2.24.90.

Signed-off-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-14 10:08:22 +00:00
Greg Ungerer
adb560f7fc hw/net: implement MIB counters in mcf_fec driver
The FEC ethernet hardware module used on ColdFire SoC parts contains a
block of RAM used to maintain hardware counters. This block is accessible
via the usual FEC register address space. There is currently no support
for this in the QEMU mcf_fec driver.

Add support for storing a MIB RAM block, and provide register level
access to it. Also implement a basic set of stats collection functions
to populate MIB data fields.

This support tested running a Linux target and using the net-tools
"ethtool -S" option. As of linux-4.9 the kernels FEC driver makes
accesses to the MIB counters during its initialization (which it never
did before), and so this version of Linux will now fail with the QEMU
error:

    qemu: hardware error: mcf_fec_read: Bad address 0x200

This MIB counter support fixes this problem.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-14 15:39:55 +08:00
Zhang Chen
e630b2bf7c COLO-compare: Fix trace_event print bug
Because of inet_ntoa() return a statically allocated buffer,
subsequent calls will overwrite, So we fix this bug.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-14 15:39:55 +08:00
Paolo Bonzini
7ec7ae4b97 e1000e: correctly tear down MSI-X memory regions
MSI-X has been disabled by the time the e1000e device is unrealized, hence
msix_uninit is never called.  This causes the object to be leaked, which
shows up as a RAMBlock with empty name when attempting migration.

Reported-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-14 15:39:55 +08:00
David Gibson
28df75d8d1 dtc: Update submodule to avoid build errors
The currently included version of the dtc/libfdt submodule has some build
errors on certain distributions (including RHEL7).  This is due to some
poorly named macros in libfdt.h; they're designed for use with the sparse
static checker, but use reserved names which conflict with some symbols in
the standard headers.

That's been corrected in upstream dtc, this updates the qemu submodule to
bring the fix to qemu.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-14 12:24:29 +11:00
David Gibson
82516263ce pseries: Don't expose PCIe extended config space on older machine types
bb9986452 "spapr_pci: Advertise access to PCIe extended config space"
allowed guests to access the extended config space of PCI Express devices
via the PAPR interfaces, even though the paravirtualized bus mostly acts
like plain PCI.

However, that patch enabled access unconditionally, including for existing
machine types, which is an unwise change in behaviour.  This patch limits
the change to pseries-2.9 (and later) machine types.

Suggested-by: Andrea Bolognani <abologna@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-14 11:54:17 +11:00
Nikunj A Dadhania
38a61d3487 target/ppc: fix cpu_ov setting for 32-bit
A bug was introduced in following commit:

    dc0ad84 target/ppc: update overflow flags for add/sub

As for 32-bit ppc target extracting bit 63 for overflow is not correct.
Made it dependent on TARGET_LOG_BITS. This had broken booting MacOS
9.2.1 image

Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-03-14 11:27:23 +11:00
Thomas Huth
f244115cbd target/ppc: Fix wrong number of UAMR register
The SPR UAMR has the number 13, and not 12. (Fortunately it seems like
Linux is not using this register yet - only the privileged version with
number 29 ... that's why nobody noticed this problem yet)

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-14 11:12:10 +11:00
Peter Maydell
5bac3c39c8 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer fixes for 2.9.0-rc1

# gpg: Signature made Mon 13 Mar 2017 11:53:16 GMT
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  commit: Implement .bdrv_refresh_filename
  mirror: Implement .bdrv_refresh_filename
  block: Refresh filename after changing backing file
  commit: Implement bdrv_commit_top.bdrv_co_get_block_status
  block: Request block status from *file for BDRV_BLOCK_RAW
  block: Remove check_new_perm from bdrv_replace_child()
  migration: Document handling of bdrv_is_allocated() errors
  vvfat: React to bdrv_is_allocated() errors
  backup: React to bdrv_is_allocated() errors
  block: Drop unmaintained 'archipelago' driver
  file-posix: Consider max_segments for BlockLimits.max_transfer
  backup: allow target without .bdrv_get_info

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-13 15:08:01 +00:00
Peter Maydell
f962709c69 Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
x86: Haswell TSX blacklist fix for 2.9

# gpg: Signature made Fri 10 Mar 2017 18:45:08 GMT
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request:
  i386: Change stepping of Haswell to non-blacklisted value
  i386/kvm: Blacklist TSX on known broken hosts
  i386: host_vendor_fms() helper function

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-13 13:16:35 +00:00
Kevin Wolf
dcbf37ce41 commit: Implement .bdrv_refresh_filename
We want query-block to return the right filename, even if a commit job
put a bdrv_commit_top on top of the actual image format driver. Let
bdrv_commit_top.bdrv_refresh_filename get the filename from its backing
file.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-13 12:49:33 +01:00
Kevin Wolf
fd4a6493bb mirror: Implement .bdrv_refresh_filename
We want query-block to return the right filename, even if a mirror job
put a bdrv_mirror_top on top of the actual image format driver. Let
bdrv_mirror_top.bdrv_refresh_filename get the filename from its backing
file.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-13 12:49:33 +01:00
Kevin Wolf
9e7e940c3d block: Refresh filename after changing backing file
In bdrv_open_inherit(), the filename is refreshed after opening the
backing file, but we neglected to do the same when the backing file
changes later.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-13 12:49:33 +01:00
Kevin Wolf
9196565866 commit: Implement bdrv_commit_top.bdrv_co_get_block_status
In some cases, bdrv_co_get_block_status() is called recursively for the
whole backing chain. The automatically inserted bdrv_commit_top filter
driver must not stop the recursion, so implement a callback that simply
forwards the request to bs->backing.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-13 12:49:33 +01:00
Kevin Wolf
b64aa44195 block: Request block status from *file for BDRV_BLOCK_RAW
This fixes bdrv_co_get_block_status() for the bdrv_mirror_top block
driver, which must fall through to bs->backing instead of bs->file.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-13 12:49:33 +01:00
Kevin Wolf
466787fbca block: Remove check_new_perm from bdrv_replace_child()
All callers pass false now, so the parameter can go away again.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-13 12:49:33 +01:00
Eric Blake
7d66b1fbd2 migration: Document handling of bdrv_is_allocated() errors
Migration is the only code left in the tree that does not react
to bdrv_is_allocated() failures.  But as there is no useful way
to react to the failure, and we are merely skipping unallocated
sectors on success, just document that our choice of handling
is intended.

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-13 12:49:33 +01:00
Eric Blake
6f712ee080 vvfat: React to bdrv_is_allocated() errors
If bdrv_is_allocated() fails, we should react to that failure.
For 2 of the 3 callers, reporting the error was easy.  But in
cluster_was_modified() and its lone caller
get_cluster_count_for_direntry(), it's rather invasive to update
the logic to pass the error back; so there, I went with merely
documenting the issue by changing the return type to bool (in
all likelihood, treating the cluster as modified will then
trigger a read which will also fail, and eventually get to an
error - but given the appalling number of abort() calls in this
code, I'm not making it any worse).

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-13 12:49:33 +01:00
Eric Blake
666a9543fa backup: React to bdrv_is_allocated() errors
If bdrv_is_allocated() fails, we should immediately do the backup
error action, rather than attempting backup_do_cow() (although
that will likely fail too).

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-13 12:49:33 +01:00
Eric Blake
e32ccbc6e9 block: Drop unmaintained 'archipelago' driver
The driver has failed to build since commit da34e65, in qemu 2.6,
due to a missing include of qapi/error.h for error_setg().
Since no one has complained in three releases, it is easier to
remove the dead code than to keep it around, especially since it
is not being built by default and therefore prone to bitrot.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-13 12:49:33 +01:00
Fam Zheng
9103f1ceb4 file-posix: Consider max_segments for BlockLimits.max_transfer
BlockLimits.max_transfer can be too high without this fix, guest will
encounter I/O error or even get paused with werror=stop or rerror=stop. The
cause is explained below.

Linux has a separate limit, /sys/block/.../queue/max_segments, which in
the worst case can be more restrictive than the BLKSECTGET which we
already consider (note that they are two different things). So, the
failure scenario before this patch is:

1) host device has max_sectors_kb = 4096 and max_segments = 64;
2) guest learns max_sectors_kb limit from QEMU, but doesn't know
   max_segments;
3) guest issues e.g. a 512KB request thinking it's okay, but actually
   it's not, because it will be passed through to host device as an
   SG_IO req that has niov > 64;
4) host kernel doesn't like the segmenting of the request, and returns
   -EINVAL;

This patch checks the max_segments sysfs entry for the host device and
calculates a "conservative" bytes limit using the page size, which is
then merged into the existing max_transfer limit. Guest will discover
this from the usual virtual block device interfaces. (In the case of
scsi-generic, it will be done in the INQUIRY reply interception in
device model.)

The other possibility is to actually propagate it as a separate limit,
but it's not better. On the one hand, there is a big complication: the
limit is per-LUN in QEMU PoV (because we can attach LUNs from different
host HBAs to the same virtio-scsi bus), but the channel to communicate
it in a per-LUN manner is missing down the stack; on the other hand,
two limits versus one doesn't change much about the valid size of I/O
(because guest has no control over host segmenting).

Also, the idea to fall back to bounce buffering in QEMU, upon -EINVAL,
was explored. Unfortunately there is no neat way to ensure the bounce
buffer is less segmented (in terms of DMA addr) than the guest buffer.

Practically, this bug is not very common. It is only reported on a
Emulex (lpfc), so it's okay to get it fixed in the easier way.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-13 12:49:33 +01:00
Vladimir Sementsov-Ogievskiy
a410a7f1af backup: allow target without .bdrv_get_info
Currently backup to nbd target is broken, as nbd doesn't have
.bdrv_get_info realization.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-13 12:49:33 +01:00
Peter Maydell
b1616fe0e2 Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
# gpg: Signature made Fri 10 Mar 2017 07:15:38 GMT
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  docker/dockerfiles/debian-s390-cross: include clang
  tests/docker: support proxy / corporate firewall

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-13 11:26:36 +00:00
Eduardo Habkost
ec56a4a7b0 i386: Change stepping of Haswell to non-blacklisted value
glibc blacklists TSX on Haswell CPUs with model==60 and
stepping < 4. To make the Haswell CPU model more useful, make
those guests actually use TSX by changing CPU stepping to 4.

References:
* glibc commit 2702856bf45c82cf8e69f2064f5aa15c0ceb6359
  https://sourceware.org/git/?p=glibc.git;a=commit;h=2702856bf45c82cf8e69f2064f5aa15c0ceb6359

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170309181212.18864-4-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-03-10 15:01:09 -03:00
Eduardo Habkost
40e80ee411 i386/kvm: Blacklist TSX on known broken hosts
Some Intel CPUs are known to have a broken TSX implementation. A
microcode update from Intel disabled TSX on those CPUs, but
GET_SUPPORTED_CPUID might be reporting it as supported if the
hosts were not updated yet.

Manually fixup the GET_SUPPORTED_CPUID data to ensure we will
never enable TSX when running on those hosts.

Reference:
* glibc commit 2702856bf45c82cf8e69f2064f5aa15c0ceb6359:
  https://sourceware.org/git/?p=glibc.git;a=commit;h=2702856bf45c82cf8e69f2064f5aa15c0ceb6359

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170309181212.18864-3-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-03-10 15:01:08 -03:00
Eduardo Habkost
20271d4840 i386: host_vendor_fms() helper function
Helper function for code that needs to check the host CPU
vendor/family/model/stepping values.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170309181212.18864-2-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-03-10 15:01:08 -03:00
Alex Bennée
8ba1e5f72b docker/dockerfiles/debian-s390-cross: include clang
It's a silly little limitation on Shippable that is looks for clang
in the container even though we won't use it. The arm/aarch64 cross
builds inherit this from debian.docker but as we needed to use
debian-testing for this we add it here. We also collapse the update
step into one RUN line to remove and intermediate layer of the docker
build.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20170306112848.659-1-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-03-10 15:05:22 +08:00
Peter Maydell
95b0eca46e Merge remote-tracking branch 'remotes/stsquad/tags/pull-mttcg-fixups-090317-1' into staging
Fix-ups for MTTCG regressions for 2.9

This is the same as v3 posted a few days ago except with a few extra
Reviewed-by tags added.

# gpg: Signature made Thu 09 Mar 2017 10:45:18 GMT
# gpg:                using RSA key 0xFBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>"
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8  DF35 FBD0 DB09 5A9E 2A44

* remotes/stsquad/tags/pull-mttcg-fixups-090317-1:
  hw/intc/arm_gic: modernise the DPRINTF
  target/arm/helper: make it clear the EC field is also in hex
  target-i386: defer VMEXIT to do_interrupt
  target/mips: hold BQL for timer interrupts
  translate-all: exit cpu_restore_state early if translating
  target/xtensa: hold BQL for interrupt processing
  s390x/misc_helper.c: wrap IO instructions in BQL
  sparc/sparc64: grab BQL before calling cpu_check_irqs
  cpus.c: add additional error_report when !TARGET_SUPPORT_MTTCG
  target/i386/cpu.h: declare TCG_GUEST_DEFAULT_MO
  vl/cpus: be smarter with icount and MTTCG

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-09 18:53:55 +00:00
Peter Maydell
dd4d257821 Merge remote-tracking branch 'remotes/kraxel/tags/pull-fixes-20170309-1' into staging
2.9 bugfixes for ohci and qxl

# gpg: Signature made Thu 09 Mar 2017 09:09:44 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-fixes-20170309-1:
  qxl: clear guest_cursor on QXL_CURSOR_HIDE
  ohci: relax link check

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-09 13:16:05 +00:00
Alex Bennée
68bf93ce9d hw/intc/arm_gic: modernise the DPRINTF
While I was debugging the icount issues I realised a bunch of the
messages look quite similar. I've fixed this by including __func__ in
the debug print. At the same time I move the a modern if (GATE) style
printf which ensures the compiler can check for format string errors
even if the code gets optimised away in the non-DEBUG_GIC case.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-09 10:41:49 +00:00
Alex Bennée
6568da459b target/arm/helper: make it clear the EC field is also in hex
..just like the rest of the displayed ESR register. Otherwise people
might scratch their heads if a not obviously hex number is displayed
for the EC field.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: KONRAD Frederic <fred.konrad@greensocs.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-09 10:41:48 +00:00
Paolo Bonzini
10cde894b6 target-i386: defer VMEXIT to do_interrupt
Paths through the softmmu code during code generation now need to be audited
to check for double locking of tb_lock.  In particular, VMEXIT can take tb_lock
through cpu_vmexit -> cpu_x86_update_cr4 -> tlb_flush.

To avoid this, split VMEXIT delivery in two parts, similar to what is done with
exceptions.  cpu_vmexit only records the VMEXIT exit code and information, and
cc->do_interrupt can then deliver it when it is safe to take the lock.

Reported-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
Suggested-by: Richard Henderson <rth@twiddle.net>
Tested-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-03-09 10:41:48 +00:00
Yongbok Kim
d394698d73 target/mips: hold BQL for timer interrupts
Hold BQL when accessing timer which can cause interrupts

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-03-09 10:41:48 +00:00
Alex Bennée
d8b2239bcd translate-all: exit cpu_restore_state early if translating
The translation code uses cpu_ld*_code which can trigger a tlb_fill
which if it fails will erroneously attempts a fault resolution. This
never works during translation as the TB being generated hasn't been
added yet. The target should have checked retaddr before calling
cpu_restore_state but for those that have yet to be fixed we do it
here to avoid a recursive tb_lock() under MTTCG's new locking regime.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-03-09 10:41:43 +00:00
Alex Bennée
47e2088797 target/xtensa: hold BQL for interrupt processing
Make sure we have the BQL held when processing interrupts.

Reported-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
2017-03-09 10:41:43 +00:00
Alex Bennée
278f5e98c6 s390x/misc_helper.c: wrap IO instructions in BQL
Helpers that can trigger IO events (including interrupts) need to be
protected by the BQL. I've updated all the helpers that call into an
ioinst_handle_* functions.

Reported-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-09 10:41:43 +00:00
Alex Bennée
5ee5993001 sparc/sparc64: grab BQL before calling cpu_check_irqs
IRQ modification is part of device emulation and should be done while
the BQL is held to prevent races when MTTCG is enabled. This adds
assertions in the hw emulation layer and wraps the calls from helpers
in the BQL.

Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2017-03-09 10:41:38 +00:00
Alex Bennée
c34c762015 cpus.c: add additional error_report when !TARGET_SUPPORT_MTTCG
While we may fail the memory ordering check later that can be
confusing. So in cases where TARGET_SUPPORT_MTTCG has yet to be
defined we should say so specifically.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-09 10:38:16 +00:00
Alex Bennée
72c1701f62 target/i386/cpu.h: declare TCG_GUEST_DEFAULT_MO
This suppresses the incorrect warning when forcing MTTCG for x86
guests on x86 hosts. A future patch will still warn when
TARGET_SUPPORT_MTTCG hasn't been defined for the guest (which is still
pending for x86).

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Acked-by: Eduardo Habkost <ehabkost@redhat.com>
2017-03-09 10:38:02 +00:00
Alex Bennée
83fd9629a3 vl/cpus: be smarter with icount and MTTCG
The sense of the test was inverted. Make it simple, if icount is
enabled then we disabled MTTCG by default. If the user tries to force
MTTCG upon us then we tell them "no".

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-03-09 10:38:02 +00:00
Gerd Hoffmann
dbb5fb8d35 qxl: clear guest_cursor on QXL_CURSOR_HIDE
Make sure we don't leave guest_cursor pointing into nowhere.  This might
lead to (rare) live migration failures, due to target trying to restore
the cursor from the stale pointer.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1421788
Reported-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1488789111-27340-1-git-send-email-kraxel@redhat.com
2017-03-09 09:47:26 +01:00
Gerd Hoffmann
ab6b1105a2 ohci: relax link check
The strict td link limit added by commit "95ed569 usb: ohci: limit the
number of link eds" causes problems with macos guests.  Lets raise the
limit.

Reported-by: Programmingkid <programmingkidx@gmail.com>
Reported-by: Howard Spoelstra <hsp.cat7@gmail.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: BALATON Zoltan <balaton@eik.bme.hu>
Reviewed-by: John Arbuckle <programmingkidx@gmail.com>
Message-id: 1488876018-31576-1-git-send-email-kraxel@redhat.com
2017-03-09 09:46:13 +01:00
Peter Maydell
b64842dee4 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer fixes for 2.9.0-rc0

# gpg: Signature made Tue 07 Mar 2017 14:59:18 GMT
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (27 commits)
  commit: Don't use error_abort in commit_start
  block: Don't use error_abort in blk_new_open
  sheepdog: Support blockdev-add
  qapi-schema: Rename SocketAddressFlat's variant tcp to inet
  qapi-schema: Rename GlusterServer to SocketAddressFlat
  gluster: Plug memory leaks in qemu_gluster_parse_json()
  gluster: Don't duplicate qapi-util.c's qapi_enum_parse()
  gluster: Drop assumptions on SocketTransport names
  sheepdog: Implement bdrv_parse_filename()
  sheepdog: Use SocketAddress and socket_connect()
  sheepdog: Report errors in pseudo-filename more usefully
  sheepdog: Don't truncate long VDI name in _open(), _create()
  sheepdog: Fix snapshot ID parsing in _open(), _create, _goto()
  sheepdog: Mark sd_snapshot_delete() lossage FIXME
  sheepdog: Fix error handling sd_create()
  sheepdog: Fix error handling in sd_snapshot_delete()
  sheepdog: Defuse time bomb in sd_open() error handling
  block: Fix error handling in bdrv_replace_in_backing_chain()
  block: Handle permission errors in change_parent_backing_link()
  block: Ignore multiple children in bdrv_check_update_perm()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-08 09:47:52 +00:00
Peter Maydell
87467097f8 Merge remote-tracking branch 'remotes/armbru/tags/pull-block-2017-02-28-v4' into staging
block: Command line option -blockdev

# gpg: Signature made Tue 07 Mar 2017 15:07:59 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-block-2017-02-28-v4: (24 commits)
  keyval: Support lists
  docs/qapi-code-gen.txt: Clarify naming rules
  qapi: Improve how keyval input visitor reports unexpected dicts
  block: Initial implementation of -blockdev
  qapi: New qobject_input_visitor_new_str() for convenience
  keyval: Restrict key components to valid QAPI names
  qapi: New parse_qapi_name()
  test-qapi-util: New, covering qapi/qapi-util.c
  monitor: Assert qmp_schema_json[] is sane
  test-visitor-serialization: Pass &error_abort to qobject_from_json()
  check-qjson: Test errors from qobject_from_json()
  block: More detailed syntax error reporting for JSON filenames
  qobject: Propagate parse errors through qobject_from_json()
  test-qobject-input-visitor: Abort earlier on bad test input
  qjson: Abort earlier on qobject_from_jsonf() misuse
  libqtest: Fix qmp() & friends to abort on JSON parse errors
  qobject: Propagate parse errors through qobject_from_jsonv()
  qapi: Factor out common qobject_input_get_keyval()
  qapi: Factor out common part of qobject input visitor creation
  test-keyval: Cover use with qobject input visitor
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-07 17:06:48 +00:00
Markus Armbruster
0b2c1beea4 keyval: Support lists
Additionally permit non-negative integers as key components.  A
dictionary's keys must either be all integers or none.  If all keys
are integers, convert the dictionary to a list.  The set of keys must
be [0,N].

Examples:

* list.1=goner,list.0=null,list.1=eins,list.2=zwei
  is equivalent to JSON [ "null", "eins", "zwei" ]

* a.b.c=1,a.b.0=2
  is inconsistent: a.b.c clashes with a.b.0

* list.0=null,list.2=eins,list.2=zwei
  has a hole: list.1 is missing

Similar design flaw as for objects: there is no way to denote an empty
list.  While interpreting "key absent" as empty list seems natural
(removing a list member from the input string works when there are
multiple ones, so why not when there's just one), it doesn't work:
"key absent" already means "optional list absent", which isn't the
same as "empty list present".

Update the keyval object visitor to use this a.0 syntax in error
messages rather than the usual a[0].

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1488317230-26248-25-git-send-email-armbru@redhat.com>
[Off-by-one fix squashed in, as per Kevin's review]
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 16:07:48 +01:00
Markus Armbruster
79f7598164 docs/qapi-code-gen.txt: Clarify naming rules
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-24-git-send-email-armbru@redhat.com>
2017-03-07 16:07:48 +01:00
Markus Armbruster
31478f26ab qapi: Improve how keyval input visitor reports unexpected dicts
Incorrect option

    -blockdev node-name=foo,driver=file,filename=foo.img,aio.unmap=on

is rejected with "Invalid parameter type for 'aio', expected: string".
To make sense of this, you almost have to translate it into the
equivalent QMP command

    { "execute": "blockdev-add", "arguments": { "node-name": "foo", "driver": "file", "filename": "foo.img", "aio": { "unmap": true } } }

Improve the error message to "Parameters 'aio.*' are unexpected".
Take care not to confuse the case "unexpected nested parameters"
(i.e. the object is a QDict or QList) with the case "non-string scalar
parameter".  The latter is a misuse of the visitor, and should perhaps
be an assertion.  Note that test-qobject-input-visitor exercises this
misuse in test_visitor_in_int_keyval(), test_visitor_in_bool_keyval()
and test_visitor_in_number_keyval().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-23-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
42e5f39378 block: Initial implementation of -blockdev
The new command line option -blockdev works like QMP command
blockdev-add.

The option argument may be given in JSON syntax, exactly as in QMP.
Example usage:

    -blockdev '{"node-name": "foo", "driver": "raw", "file": {"driver": "file", "filename": "foo.img"} }'

The JSON argument doesn't exactly blend into the existing option
syntax, so the traditional KEY=VALUE,... syntax is also supported,
using dotted keys to do the nesting:

    -blockdev node-name=foo,driver=raw,file.driver=file,file.filename=foo.img

This does not yet support lists, but that will be addressed shortly.

Note that calling qmp_blockdev_add() (say via qmp_marshal_block_add())
right away would crash.  We need to stash the configuration for later
instead.  This is crudely done, and bypasses QemuOpts, even though
storing configuration is what QemuOpts is for.  Need to revamp option
infrastructure to support QAPI types like BlockdevOptions.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488317230-26248-22-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
9d1eab4b95 qapi: New qobject_input_visitor_new_str() for convenience
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1488317230-26248-21-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
f740048323 keyval: Restrict key components to valid QAPI names
Until now, key components are separated by '.'.  This leaves little
room for evolving the syntax, and is incompatible with the __RFQDN_
prefix convention for downstream extensions.

Since key components will be commonly used as QAPI member names by the
QObject input visitor, we can just as well borrow the QAPI naming
rules here: letters, digits, hyphen and period starting with a letter,
with an optional __RFQDN_ prefix for downstream extensions.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-20-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
069b64e3fe qapi: New parse_qapi_name()
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488317230-26248-19-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
6c873d1149 test-qapi-util: New, covering qapi/qapi-util.c
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-18-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
b0f36b23f1 monitor: Assert qmp_schema_json[] is sane
qmp_query_qmp_schema() parses qmp_schema_json[] with
qobject_from_json().  This must not fail, so pass &error_abort.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-17-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
02146d27c3 test-visitor-serialization: Pass &error_abort to qobject_from_json()
qmp_deserialize() calls qobject_from_json() ignoring errors.  It
passes the result to qobject_input_visitor_new(), which asserts it's
not null.  Therefore, we can just as well pass &error_abort to
qobject_from_json().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-16-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
aec4b054ea check-qjson: Test errors from qobject_from_json()
Pass &error_abort with known-good input.  Else pass &err and check
what comes back.  This demonstrates that the parser fails silently for
many errors.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-15-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
5577fff738 block: More detailed syntax error reporting for JSON filenames
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-14-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
57348c2f18 qobject: Propagate parse errors through qobject_from_json()
The next few commits will put the errors to use where appropriate.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488317230-26248-13-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
bff17e84a9 test-qobject-input-visitor: Abort earlier on bad test input
visitor_input_test_init_internal() parses test input with
qobject_from_jsonv(), and asserts it succeeds.  Pass &error_abort for
good measure.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-12-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
ea5ef5c80b qjson: Abort earlier on qobject_from_jsonf() misuse
Ignoring errors first, then asserting success is suboptimal.  Pass
&error_abort instead, so we abort earlier, and hopefully get more
useful clues on what's wrong.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-11-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
53f991520e libqtest: Fix qmp() & friends to abort on JSON parse errors
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-10-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
99dbfd1db1 qobject: Propagate parse errors through qobject_from_jsonv()
The next few commits will put the errors to use where appropriate.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-9-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
e3934b4297 qapi: Factor out common qobject_input_get_keyval()
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488317230-26248-8-git-send-email-armbru@redhat.com>
2017-03-07 16:07:47 +01:00
Markus Armbruster
abe81bc21a qapi: Factor out common part of qobject input visitor creation
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-7-git-send-email-armbru@redhat.com>
2017-03-07 16:07:46 +01:00
Markus Armbruster
9e3943f883 test-keyval: Cover use with qobject input visitor
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-6-git-send-email-armbru@redhat.com>
2017-03-07 16:07:46 +01:00
Daniel P. Berrange
cbd8acf38f qapi: qobject input visitor variant for use with keyval_parse()
Currently the QObjectInputVisitor assumes that all scalar values are
directly represented as the final types declared by the thing being
visited. i.e. it assumes an 'int' is using QInt, and a 'bool' is using
QBool, etc.  This is good when QObjectInputVisitor is fed a QObject
that came from a JSON document on the QMP monitor, as it will strictly
validate correctness.

To allow QObjectInputVisitor to be reused for visiting a QObject
originating from keyval_parse(), an alternative mode is needed where
all the scalars types are represented as QString and converted on the
fly to the final desired type.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1475246744-29302-8-git-send-email-berrange@redhat.com>

Rebased, conflicts resolved, commit message updated to refer to
keyval_parse().  autocast replaced by keyval in identifiers,
noautocast replaced by fail in tests.

Fix qobject_input_type_uint64_keyval() not to reject '-', for QemuOpts
compatibility: replace parse_uint_full() by open-coded
parse_option_number().  The next commit will add suitable tests.
Leave out the fancy ERANGE error reporting for now, but add a TODO
comment.  Add it qobject_input_type_int64_keyval() and
qobject_input_type_number_keyval(), too.

Open code parse_option_bool() and parse_option_size() so we have to
call qobject_input_get_name() only when actually needed.  Again, leave
out ERANGE error reporting for now.

QAPI/QMP downstream extension prefixes __RFQDN_ don't work, because
keyval_parse() splits them at '.'.  This will be addressed later in
the series.

qobject_input_type_int64_keyval(), qobject_input_type_uint64_keyval(),
qobject_input_type_number_keyval() tweaked for style.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-5-git-send-email-armbru@redhat.com>
2017-03-07 16:07:46 +01:00
Markus Armbruster
d454dbe0ee keyval: New keyval_parse()
keyval_parse() parses KEY=VALUE,... into a QDict.  Works like
qemu_opts_parse(), except:

* Returns a QDict instead of a QemuOpts (d'oh).

* Supports nesting, unlike QemuOpts: a KEY is split into key
  fragments at '.' (dotted key convention; the block layer does
  something similar on top of QemuOpts).  The key fragments are QDict
  keys, and the last one's value is updated to VALUE.

* Each key fragment may be up to 127 bytes long.  qemu_opts_parse()
  limits the entire key to 127 bytes.

* Overlong key fragments are rejected.  qemu_opts_parse() silently
  truncates them.

* Empty key fragments are rejected.  qemu_opts_parse() happily
  accepts empty keys.

* It does not store the returned value.  qemu_opts_parse() stores it
  in the QemuOptsList.

* It does not treat parameter "id" specially.  qemu_opts_parse()
  ignores all but the first "id", and fails when its value isn't
  id_wellformed(), or duplicate (a QemuOpts with the same ID is
  already stored).  It also screws up when a value contains ",id=".

* Implied value is not supported.  qemu_opts_parse() desugars "foo" to
  "foo=on", and "nofoo" to "foo=off".

* An implied key's value can't be empty, and can't contain ','.

I intend to grow this into a saner replacement for QemuOpts.  It'll
take time, though.

Note: keyval_parse() provides no way to do lists, and its key syntax
is incompatible with the __RFQDN_ prefix convention for downstream
extensions, because it blindly splits at '.', even in __RFQDN_.  Both
issues will be addressed later in the series.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1488317230-26248-4-git-send-email-armbru@redhat.com>
2017-03-07 16:07:46 +01:00
Markus Armbruster
112c944655 tests: Fix gcov-files-test-qemu-opts-y, gcov-files-test-logging-y
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-3-git-send-email-armbru@redhat.com>
2017-03-07 16:07:46 +01:00
Markus Armbruster
0e2052b260 test-qemu-opts: Cover qemu_opts_parse() of "no"
qemu_opts_parse() interprets "no" as negated empty key.  Consistent
with its acceptance of empty keys elsewhere, whatever that's worth.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <1488317230-26248-2-git-send-email-armbru@redhat.com>
2017-03-07 16:07:46 +01:00
Peter Maydell
43c227f9dd disas/arm: Avoid unintended sign extension
When assembling 'given' from the instruction bytes, C's integer
promotion rules mean we may promote an unsigned char to a signed
integer before shifting it, and then sign extend to a 64-bit long,
which can set the high bits of the long.  The code doesn't in fact
care about the high bits if the long is 64 bits, but this is
surprising, so don't do it.

(Spotted by Coverity, CID 1005404.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488556233-31246-7-git-send-email-peter.maydell@linaro.org
2017-03-07 14:33:51 +00:00
Peter Maydell
001ebaca7b disas/cris: Avoid unintended sign extension
In the cris disassembler we were using 'unsigned long' to calculate
addresses which are supposed to be 32 bits.  This meant that we might
accidentally sign extend or calculate a value that was outside the 32
bit range of the guest CPU.  Use 'uint32_t' instead so we give the
right answers on 64-bit hosts.

(Spotted by Coverity, CID 1005402, 1005403.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1488556233-31246-6-git-send-email-peter.maydell@linaro.org
2017-03-07 14:33:51 +00:00
Peter Maydell
1d153a3388 disas/microblaze: Avoid unintended sign extension
In read_insn_microblaze() we assemble 4 bytes into an 'unsigned
long'.  If 'unsigned long' is 64 bits and the high byte has its top
bit set, then C's implicit conversion from 'unsigned char' to 'int'
for the shift will result in an unintended sign extension which sets
the top 32 bits in 'inst'.  Add casts to prevent this.  (Spotted by
Coverity, CID 1005401.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Message-id: 1488556233-31246-5-git-send-email-peter.maydell@linaro.org
2017-03-07 14:33:51 +00:00
Peter Maydell
2e3883d03d disas/m68k: Avoid unintended sign extension in get_field()
In get_field(), we take an 'unsigned char' value and shift it left,
which implicitly promotes it to 'signed int', before ORing it into an
'unsigned long' type.  If 'unsigned long' is 64 bits then this will
result in a sign extension and the top 32 bits of the result will be
1s.  Add explicit casts to unsigned long before shifting to prevent
this.

(Spotted by Coverity, CID 715697.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-id: 1488556233-31246-4-git-send-email-peter.maydell@linaro.org
2017-03-07 14:33:51 +00:00
Peter Maydell
3f168b5d35 disas/i386: Avoid NULL pointer dereference in error case
In a code path where we hit an internal disassembler error, execution
would subsequently attempt to dereference a NULL pointer.  This
should never happen, but avoid the crash.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488556233-31246-3-git-send-email-peter.maydell@linaro.org
2017-03-07 14:33:51 +00:00
Peter Maydell
6815a8a00a disas/hppa: Remove dead code
Coverity complains (CID 1302705) that the "fr0" part of the ?: in
fput_fp_reg_r() is dead.  This looks like cut-n-paste error from
fput_fp_reg(); delete the dead code.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1488556233-31246-2-git-send-email-peter.maydell@linaro.org
2017-03-07 14:33:51 +00:00
Fam Zheng
b69f00dde4 commit: Don't use error_abort in commit_start
bdrv_set_backing_hd failure needn't be abort. Since we already have
error parameter, use it.

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:29 +01:00
Fam Zheng
50bfbe93b2 block: Don't use error_abort in blk_new_open
We have an errp and bdrv_root_attach_child can fail permission check,
error_abort is not the best choice here.

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:29 +01:00
Markus Armbruster
d282f34e82 sheepdog: Support blockdev-add
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:29 +01:00
Markus Armbruster
c5f1ae3ae7 qapi-schema: Rename SocketAddressFlat's variant tcp to inet
QAPI type SocketAddressFlat differs from SocketAddress pointlessly:
the discriminator value for variant InetSocketAddress is 'tcp' instead
of 'inet'.  Rename.

The type is so far only used by the Gluster block drivers.  Take care
to keep 'tcp' working in things like -drive's file.server.0.type=tcp.
The "gluster+tcp" URI scheme in pseudo-filenames stays the same.
blockdev-add changes, but it has changed incompatibly since 2.8
already.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:29 +01:00
Markus Armbruster
2b733709d7 qapi-schema: Rename GlusterServer to SocketAddressFlat
As its documentation says, it's not specific to Gluster.  Rename it,
as I'm going to use it for something else.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:29 +01:00
Markus Armbruster
85a82e852d gluster: Plug memory leaks in qemu_gluster_parse_json()
To reproduce, run

    $ valgrind qemu-system-x86_64 --nodefaults -S --drive driver=gluster,volume=testvol,path=/a/b/c,server.0.type=xxx

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:29 +01:00
Markus Armbruster
e152ef7a98 gluster: Don't duplicate qapi-util.c's qapi_enum_parse()
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:29 +01:00
Markus Armbruster
fc29458dee gluster: Drop assumptions on SocketTransport names
qemu_gluster_glfs_init() passes the names of QAPI enumeration type
SocketTransport to glfs_set_volfile_server().  Works, because they
were chosen to match.  But the coupling is artificial.  Use the
appropriate literal strings instead.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:28 +01:00
Markus Armbruster
831acdc95e sheepdog: Implement bdrv_parse_filename()
This permits configuration with driver-specific options in addition to
pseudo-filename parsed as URI.  For instance,

    --drive driver=sheepdog,host=fido,vdi=dolly

instead of

    --drive driver=sheepdog,file=sheepdog://fido/dolly

It's also a first step towards supporting blockdev-add.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:28 +01:00
Markus Armbruster
8ecc2f9eab sheepdog: Use SocketAddress and socket_connect()
sd_parse_uri() builds a string from host and port parts for
inet_connect().  inet_connect() parses it into host, port and options.
Whether this gets exactly the same host, port and no options for all
inputs is not obvious.

Cut out the string middleman and build a SocketAddress for
socket_connect() instead.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:28 +01:00
Markus Armbruster
36bcac16fd sheepdog: Report errors in pseudo-filename more usefully
Errors in the pseudo-filename are all reported with the same laconic
"Can't parse filename" message.

Add real error reporting, such as:

    $ qemu-system-x86_64 --drive driver=sheepdog,filename=sheepdog:///
    qemu-system-x86_64: --drive driver=sheepdog,filename=sheepdog:///: missing file path in URI
    $ qemu-system-x86_64 --drive driver=sheepdog,filename=sheepgod:///vdi
    qemu-system-x86_64: --drive driver=sheepdog,filename=sheepgod:///vdi: URI scheme must be 'sheepdog', 'sheepdog+tcp', or 'sheepdog+unix'
    $ qemu-system-x86_64 --drive driver=sheepdog,filename=sheepdog+unix:///vdi?socke=sheepdog.sock
    qemu-system-x86_64: --drive driver=sheepdog,filename=sheepdog+unix:///vdi?socke=sheepdog.sock: unexpected query parameters

The code to translate legacy syntax to URI fails to escape URI
meta-characters.  The new error messages are misleading then.  Replace
them by the old "Can't parse filename" message.  "Internal error"
would be more honest.  Anyway, no worse than before.  Also add a FIXME
comment.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:28 +01:00
Markus Armbruster
daa0b0d4b1 sheepdog: Don't truncate long VDI name in _open(), _create()
sd_parse_uri() truncates long VDI names silently.  Reject them
instead.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:28 +01:00
Markus Armbruster
89e2a31d33 sheepdog: Fix snapshot ID parsing in _open(), _create, _goto()
sd_parse_uri() and sd_snapshot_goto() screw up error checking after
strtoul(), and truncate long tag names silently.  Fix by replacing
those parts by new sd_parse_snapid_or_tag(), which checks more
carefully.

sd_snapshot_delete() also parses snapshot IDs, but is currently too
broken for me to touch.  Mark TODO.

Two calls of strtol() without error checking remain in
parse_redundancy().  Mark them FIXME.

More silent truncation of configuration strings remains elsewhere.
Not marked.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:28 +01:00
Markus Armbruster
a0dc0e2bfe sheepdog: Mark sd_snapshot_delete() lossage FIXME
sd_snapshot_delete() should delete the snapshot whose ID matches
@snapshot_id and whose name matches @name.  But that's not what it
does.  If @snapshot_id is a valid ID, it deletes the snapshot with
that ID, else it deletes the snapshot with that name.  It doesn't use
@name at all.  Add suitable FIXME comments, so someone who actually
knows Sheepdog can fix it.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:28 +01:00
Markus Armbruster
48d7c4af06 sheepdog: Fix error handling sd_create()
As a bdrv_create() method, sd_create() must set an error and return
negative errno on failure.  It prints the error instead of setting it
when connect_to_sdog() fails.  Fix that.

While there, return the value of connect_to_sdog() like we do
elsewhere, instead of -EIO.  No functional change, as
connect_to_sdog() returns no other error code.

Many more suspicious uses of error_report() and error_report_err()
remain in other functions.  Left for another day.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:28 +01:00
Markus Armbruster
e25cad6921 sheepdog: Fix error handling in sd_snapshot_delete()
As a bdrv_snapshot_delete() method, sd_snapshot_delete() must set an
error and return negative errno on failure.  It sometimes returns -1,
and sometimes neglects to set an error.  It also prints error messages
with error_report().  Fix all that.

Moreover, its handling of an attempt to delete a nonexistent snapshot
is wrong: it error_report()s and succeeds.  Fix it to set an error and
return -ENOENT instead.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:28 +01:00
Markus Armbruster
cbc488ee2a sheepdog: Defuse time bomb in sd_open() error handling
When qemu_opts_absorb_qdict() fails, sd_open() closes stdin, because
sd->fd is still zero.  Fortunately, qemu_opts_absorb_qdict() can't
fail, because:

1. it only fails when qemu_opt_parse() fails, and
2. the only member of runtime_opts.desc[] is a QEMU_OPT_STRING, and
3. qemu_opt_parse() can't fail for QEMU_OPT_STRING.

Defuse this ticking time bomb by jumping behind the file descriptor
cleanup on error.

Also do that for the error paths where sd->fd is still -1.  The file
descriptor cleanup happens to do nothing then, but let's not rely on
that here.

While there, rename label out to err, because it's on the error path,
not the normal path out of the function.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-03-07 14:53:28 +01:00
Kevin Wolf
5fe31c25cc block: Fix error handling in bdrv_replace_in_backing_chain()
When adding an Error parameter, bdrv_replace_in_backing_chain() would
become nothing more than a wrapper around change_parent_backing_link().
So make the latter public, renamed as bdrv_replace_node(), and remove
bdrv_replace_in_backing_chain().

Most of the callers just remove a node from the graph that they just
inserted, so they can use &error_abort, but completion of a mirror job
with 'replaces' set can actually fail.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-07 14:53:28 +01:00
Kevin Wolf
234ac1a902 block: Handle permission errors in change_parent_backing_link()
Instead of just trying to change parents by parent over to reference @to
instead of @from, and abort()ing whenever the permissions don't allow
this, do proper permission checking beforehand and pass any error to the
callers.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-07 14:53:28 +01:00
Kevin Wolf
46181129ea block: Ignore multiple children in bdrv_check_update_perm()
change_parent_backing_link() will need to update multiple BdrvChild
objects at once. Checking permissions reference by reference doesn't
work because permissions need to be consistent only with all parents
moved to the new child.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-07 14:53:28 +01:00
Kevin Wolf
8ee039951d block: Factor out bdrv_replace_child_noperm()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-07 14:53:28 +01:00
Kevin Wolf
d0ac038025 block: Factor out should_update_child()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-07 14:53:28 +01:00
Kevin Wolf
067acf28d1 block: Fix blockdev-snapshot error handling
For blockdev-snapshot, external_snapshot_prepare() accepts an arbitrary
node reference at first and only checks later whether it already has a
backing file. Between those places, other errors can occur.

Therefore checking in external_snapshot_abort() whether state->new_bs
has a backing file is not sufficient to tell whether bdrv_append() was
already completed or not. Trying to undo the bdrv_append() when it
wasn't even executed is wrong.

Introduce a new boolean flag in the state to fix this.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-07 14:53:28 +01:00
Kevin Wolf
88f9d1b3d2 mirror: Fix error path for dirty bitmap creation
mirror_top_bs must be removed from the graph again when creating the
dirty bitmap fails.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-07 14:53:28 +01:00
Kevin Wolf
0bf74767ff mirror: Fix permissions for removing mirror_top_bs
mirror_top_bs takes write permissions on its backing file, which can
make it impossible to attach that backing file node to another parent.
However, this is exactly what needs to be done in order to remove
mirror_top_bs from the backing chain. So give up the write permission
first.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-07 14:53:28 +01:00
Kevin Wolf
7d9fcb391c mirror: Fix permission problem with 'replaces'
The 'replaces' option of drive-mirror can be used to mirror a Quorum
node to a new image and then let the target image replace one of the
Quorum children. In order for this graph modification to succeed, the
mirror job needs to lift its restrictions on the target node first
before actually replacing the child.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-07 14:53:28 +01:00
Kevin Wolf
b247767aac commit: Fix error handling
Apparently some kind of mismerge happened in commit 8dfba279, which
broke the error handling without any real reason by removing the
assignment of the return value to ret in a blk_insert_bs() call.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-07 14:53:28 +01:00
Philippe Mathieu-Daudé
06cc355171 tests/docker: support proxy / corporate firewall
if ftp_proxy/http_proxy/https_proxy standard environment variables available,
pass them to the docker daemon to build images.
this is required when building behind corporate proxy/firewall, but also help
when using local cache server (ie: apt/yum).

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170306205520.32311-1-f4bug@amsat.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-03-07 18:20:40 +08:00
Peter Maydell
ff79d5e939 Merge remote-tracking branch 'remotes/xtensa/tags/20170306-xtensa' into staging
target/xtensa updates:

- instantiate local memories in xtensa sim machine;
- add two missing include files to xtensa core importing script.

# gpg: Signature made Mon 06 Mar 2017 22:32:45 GMT
# gpg:                using RSA key 0x51F9CC91F83FA044
# gpg: Good signature from "Max Filippov <filippov@cadence.com>"
# gpg:                 aka "Max Filippov <max.filippov@cogentembedded.com>"
# gpg:                 aka "Max Filippov <jcmvbkbc@gmail.com>"
# Primary key fingerprint: 2B67 854B 98E5 327D CDEB  17D8 51F9 CC91 F83F A044

* remotes/xtensa/tags/20170306-xtensa:
  target/xtensa: add two missing headers to core import script
  target/xtensa: sim: instantiate local memories

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-07 09:57:14 +00:00
Peter Maydell
d6780c8221 Merge remote-tracking branch 'remotes/gkurz/tags/fixes-for-2.9' into staging
Fixes issues that got merged with the latest pull request:
- missing O_NOFOLLOW flag for CVE-2016-960
- build break with older glibc that don't have O_PATH and AT_EMPTY_PATH
- various bugs reported by Coverity

# gpg: Signature made Mon 06 Mar 2017 17:51:29 GMT
# gpg:                using DSA key 0x02FC3AEB0101DBC2
# gpg: Good signature from "Greg Kurz <groug@kaod.org>"
# gpg:                 aka "Greg Kurz <groug@free.fr>"
# gpg:                 aka "Greg Kurz <gkurz@linux.vnet.ibm.com>"
# gpg:                 aka "Gregory Kurz (Groug) <groug@free.fr>"
# gpg:                 aka "[jpeg image of size 3330]"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2BD4 3B44 535E C0A7 9894  DBA2 02FC 3AEB 0101 DBC2

* remotes/gkurz/tags/fixes-for-2.9:
  9pfs: fix vulnerability in openat_dir() and local_unlinkat_common()
  9pfs: fix O_PATH build break with older glibc versions
  9pfs: don't use AT_EMPTY_PATH in local_set_cred_passthrough()
  9pfs: fail local_statfs() earlier
  9pfs: fix fd leak in local_opendir()
  9pfs: fix bogus fd check in local_remove()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-07 09:09:53 +00:00
Peter Maydell
7dc3bc7a04 Merge remote-tracking branch 'remotes/mdroth/tags/qga-pull-2017-03-06-tag' into staging
qemu-ga patch queue for 2.9

* fix fsfreeze for filesystems mounted in multiple locations
* fix test failure when running in a chroot
* support for socket-based activation

# gpg: Signature made Mon 06 Mar 2017 07:54:17 GMT
# gpg:                using RSA key 0x3353C9CEF108B584
# gpg: Good signature from "Michael Roth <flukshun@gmail.com>"
# gpg:                 aka "Michael Roth <mdroth@utexas.edu>"
# gpg:                 aka "Michael Roth <mdroth@linux.vnet.ibm.com>"
# Primary key fingerprint: CEAC C9E1 5534 EBAB B82D  3FA0 3353 C9CE F108 B584

* remotes/mdroth/tags/qga-pull-2017-03-06-tag:
  tests: check path to avoid a failing qga/get-vcpus test
  qga: ignore EBUSY when freezing a filesystem
  qga: add systemd socket activation support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-07 07:32:28 +00:00
Greg Kurz
b003fc0d8a 9pfs: fix vulnerability in openat_dir() and local_unlinkat_common()
We should pass O_NOFOLLOW otherwise openat() will follow symlinks and make
QEMU vulnerable.

While here, we also fix local_unlinkat_common() to use openat_dir() for
the same reasons (it was a leftover in the original patchset actually).

This fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-06 17:34:01 +01:00
Greg Kurz
918112c02a 9pfs: fix O_PATH build break with older glibc versions
When O_PATH is used with O_DIRECTORY, it only acts as an optimization: the
openat() syscall simply finds the name in the VFS, and doesn't trigger the
underlying filesystem.

On systems that don't define O_PATH, because they have glibc version 2.13
or older for example, we can safely omit it. We don't want to deactivate
O_PATH globally though, in case it is used without O_DIRECTORY. The is done
with a dedicated macro.

Systems without O_PATH may thus fail to resolve names that involve
unreadable directories, compared to newer systems succeeding, but such
corner case failure is our only option on those older systems to avoid
the security hole of chasing symlinks inappropriately.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
(added last paragraph to changelog as suggested by Eric Blake)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-03-06 17:34:01 +01:00
Greg Kurz
b314f6a077 9pfs: don't use AT_EMPTY_PATH in local_set_cred_passthrough()
The name argument can never be an empty string, and dirfd always point to
the containing directory of the file name. AT_EMPTY_PATH is hence useless
here. Also it breaks build with glibc version 2.13 and older.

It is actually an oversight of a previous tentative patch to implement this
function. We can safely drop it.

Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Greg Kurz <groug@kaod.org>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-06 17:34:01 +01:00
Greg Kurz
23da0145cc 9pfs: fail local_statfs() earlier
If we cannot open the given path, we can return right away instead of
passing -1 to fstatfs() and close(). This will make Coverity happy.

(Coverity issue CID1371729)

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Daniel P. berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-06 17:34:01 +01:00
Greg Kurz
faab207f11 9pfs: fix fd leak in local_opendir()
Coverity issue CID1371731

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-06 17:34:01 +01:00
Greg Kurz
b7361d46e7 9pfs: fix bogus fd check in local_remove()
This was spotted by Coverity as a fd leak. This is certainly true, but also
local_remove() would always return without doing anything, unless the fd is
zero, which is very unlikely.

(Coverity issue CID1371732)

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-06 17:34:01 +01:00
Peter Maydell
eba44e9339 Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Mon 06 Mar 2017 04:15:17 GMT
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  net/filter-mirror: Follow CODING_STYLE
  COLO-compare: Fix icmp and udp compare different packet always dump bug
  COLO-compare: Optimize compare_common and compare_tcp
  COLO-compare: Rename compare function and remove duplicate codes
  filter-rewriter: skip net_checksum_calculate() while offset = 0
  net/colo: fix memory double free error
  vmxnet3: VMStatify rx/tx q_descr and int_state
  vmxnet3: Convert ring values to uint32_t's
  net/colo-compare: Fix memory free error
  colo-compare: Fix removing fds been watched incorrectly in finalization
  char: remove the right fd been watched in qemu_chr_fe_set_handlers()
  colo-compare: kick compare thread to exit after some cleanup in finalization
  colo-compare: use g_timeout_source_new() to process the stale packets
  NetRxPkt: Remove code duplication in net_rx_pkt_pull_data()
  NetRxPkt: Account buffer with ETH header in IOV length
  NetRxPkt: Do not try to pull more data than present
  NetRxPkt: Fix memory corruption on VLAN header stripping
  eth: Extend vlan stripping functions
  net: Remove useless local var pkt

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-06 15:13:23 +00:00
Peter Maydell
56b51708e9 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.9-20170306' into staging
ppc patch queue for 2017-03-06

Looks like my previous batch wasn't quite the last before hard freeze.
This has a handful of bugfixes to go in.  They're all genuine
bugfixes, though not regressions in some cases.

# gpg: Signature made Mon 06 Mar 2017 04:07:48 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.9-20170306:
  target/ppc: use helper for excp handling
  target/ppc: fmadd: add macro for updating flags
  target/ppc: fmadd check for excp independently
  spapr: ensure that all threads within core are on the same NUMA node
  ppc/xics: register reset handlers for the ICP and ICS objects

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-06 13:06:30 +00:00
Peter Maydell
fbddc2e560 Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-02-28' into staging
QAPI patches for 2017-02-28

# gpg: Signature made Sun 05 Mar 2017 08:21:51 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2017-02-28: (27 commits)
  qapi: Improve qobject visitor documentation
  qapi: Fix object input visit beyond end of list
  tests: Cover input visit beyond end of list
  qapi: Make input visitors detect unvisited list tails
  test-qobject-input-visitor: Cover missing nested struct member
  tests: Cover partial input visit of list
  test-string-input-visitor: Improve list coverage
  test-string-input-visitor: Tear down existing test automatically
  tests-qobject-input-strict: Merge into test-qobject-input-visitor
  qapi: Drop unused non-strict qobject input visitor
  test-qobject-input-visitor: Use strict visitor
  qom: Make object_property_set_qobject()'s input visitor strict
  qapi: Make string input and opts visitor require non-null input
  qapi: Drop string input visitor method optional()
  qapi: Improve qobject input visitor error reporting
  qapi: Make QObject input visitor set *list reliably
  qapi: Clean up after commit 3d344c2
  qapi: Improve a QObject input visitor error message
  qmp: Eliminate silly QERR_QMP_* macros
  qmp: Drop duplicated QMP command object checks
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-06 10:18:33 +00:00
Bruce Rogers
ec72c0e271 tests: check path to avoid a failing qga/get-vcpus test
The qga/get-vcpus test fails in a simple chroot environment, as
used in an openSUSE Build Service local build, so first check
that the sysfs based path exists in order to avoid calling this
test in an environment where it won't work right.

Signed-off-by: Bruce Rogers <brogers@suse.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-03-06 00:54:19 -06:00
Peter Lieven
ce2eb6c4a0 qga: ignore EBUSY when freezing a filesystem
the current implementation fails if we try to freeze an
already frozen filesystem. This can happen if a filesystem
is mounted more than once (e.g. with a bind mount).

Suggested-by: Christian Theune <ct@flyingcircus.io>
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-03-06 00:54:18 -06:00
Stefan Hajnoczi
26de229657 qga: add systemd socket activation support
AF_UNIX and AF_VSOCK listen sockets can be passed in by systemd on
startup.  This allows systemd to manage the listen socket until the
first client connects and between restarts.  Advantages of socket
activation are that parallel startup of network services becomes
possible and that unused daemons do not consume memory.

The key to achieving this is the LISTEN_FDS environment variable, which
is a stable ABI as shown here:
https://www.freedesktop.org/wiki/Software/systemd/InterfacePortabilityAndStabilityChart/

We could link against libsystemd and use sd_listen_fds(3) but it's easy
to implement the tiny LISTEN_FDS ABI so that qemu-ga does not depend on
libsystemd.  Some systems may not have systemd installed and wish to
avoid the dependency.  Other init systems or socket activation servers
may implement the same ABI without systemd involvement.

Test as follows:

  $ cat ~/.config/systemd/user/qga.service
  [Unit]
  Description=qga

  [Service]
  WorkingDirectory=/tmp
  ExecStart=/path/to/qemu-ga --logfile=/tmp/qga.log --pidfile=/tmp/qga.pid --statedir=/tmp

  $ cat ~/.config/systemd/user/qga.socket
  [Socket]
  ListenStream=/tmp/qga.sock

  [Install]
  WantedBy=default.target

  $ systemctl --user daemon-reload
  $ systemctl --user start qga.socket
  $ nc -U /tmp/qga.sock

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2017-03-06 00:54:18 -06:00
Zhang Chen
f0aabd5c4a net/filter-mirror: Follow CODING_STYLE
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Zhang Chen
1723a7f7cf COLO-compare: Fix icmp and udp compare different packet always dump bug
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Zhang Chen
6efeb3286d COLO-compare: Optimize compare_common and compare_tcp
Add offset args for colo_packet_compare_common, optimize
colo_packet_compare_icmp() and colo_packet_compare_udp()
just compare the IP payload. Before compare all tcp packet,
we compare tcp checksum firstly, this function can get
better performance.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Zhang Chen
2ad7ca4c81 COLO-compare: Rename compare function and remove duplicate codes
Rename colo_packet_compare() to colo_packet_compare_common() that
make tcp_compare udp_compare icmp_compare reuse this function.
Remove minimum packet size check in icmp_compare, because we have
check this in parse_packet_early().

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
zhanghailiang
db0a762e4b filter-rewriter: skip net_checksum_calculate() while offset = 0
While the offset of packets's sequence for primary side and
secondary side is zero, it is unnecessary to call net_checksum_calculate()
to recalculate the checksume value of packets.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
zhanghailiang
0e79668e1f net/colo: fix memory double free error
The 'primary_list' and 'secondary_list' members of struct Connection
is not allocated through dynamically g_queue_new(), but we free it by using
g_queue_free(), which will lead to a double-free bug.

Reviewed-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Dr. David Alan Gilbert
a11f5cb005 vmxnet3: VMStatify rx/tx q_descr and int_state
Fairly simple mechanical conversion of all fields.

TODO!!!!
The problem is vmxnet3-ring size/cell_size/next are declared as size_t
but written as 32bit.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Dr. David Alan Gilbert
5504bba1fb vmxnet3: Convert ring values to uint32_t's
The index's in the Vmxnet3Ring were migrated as 32bit ints
yet are declared as size_t's.  They appear to be derived
from 32bit values loaded from guest memory, so actually
store them as that.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Zhang Chen
727c2d764f net/colo-compare: Fix memory free error
We use g_queue_init() to init s->conn_list, so we should use g_queue_clear()
to instead of g_queue_free().

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
zhanghailiang
b43decb015 colo-compare: Fix removing fds been watched incorrectly in finalization
We will catch the bellow error report while try to delete compare object
by qmp command:
chardev/char-io.c:91: io_watch_poll_finalize: Assertion `iwp->src == ((void *)0)' failed.

This is caused by failing to remove the right fd been watched while
call qemu_chr_fe_set_handlers();

Fix it by pass the worker_context parameter to qemu_chr_fe_set_handlers().

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Reviewed-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
zhanghailiang
8487ce45f8 char: remove the right fd been watched in qemu_chr_fe_set_handlers()
We can call qemu_chr_fe_set_handlers() to add/remove fd been watched
in 'context' which can be either default main context or other explicit
context. But the original logic is not correct, we didn't remove
the right fd because we call g_main_context_find_source_by_id(NULL, tag)
which always try to find the Gsource from default context.

Fix it by passing the right context to g_main_context_find_source_by_id().

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
zhanghailiang
dfd917a9c2 colo-compare: kick compare thread to exit after some cleanup in finalization
We should call g_main_loop_quit() to notify colo compare thread to
exit, Or it will run in g_main_loop_run() forever.

Besides, the finalizing process can't happen in context of colo thread,
it is reasonable to remove the 'if (qemu_thread_is_self(&s->thread))'
branch.

Before compare thead exits, some cleanup works need to be
done,  All unhandled packets need to be released and connection_track_table
needs to be freed, or there will be memory leak.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Reviewed-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
zhanghailiang
66d2a2423e colo-compare: use g_timeout_source_new() to process the stale packets
Instead of using qemu timer to process the stale packets,
We re-use the colo compare thread to process these packets
by creating a new timeout coroutine.

Besides, since we process all the same vNIC's net connection/packets
in one thread, it is safe to remove the timer_check_lock.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Dmitry Fleytman
002d394fd4 NetRxPkt: Remove code duplication in net_rx_pkt_pull_data()
This is a refactoring commit that does not change behavior.

Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Dmitry Fleytman
c5d083c561 NetRxPkt: Account buffer with ETH header in IOV length
In case of VLAN stripping ETH header is stored in a
separate chunk and length of IOV should take this into
account.

This patch fixes checksum validation for RX packets
with VLAN header.

Devices affected by this problem: e1000e and vmxnet3.

Cc: qemu-stable@nongnu.org
Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Dmitry Fleytman
d5e772146d NetRxPkt: Do not try to pull more data than present
In case of VLAN stripping, ETH header put into a
separate buffer, therefore amont of data copied
from original IOV should be smaller.

Cc: qemu-stable@nongnu.org
Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Dmitry Fleytman
df8bf7a7fe NetRxPkt: Fix memory corruption on VLAN header stripping
This patch fixed a problem that was introduced in commit eb700029.

When net_rx_pkt_attach_iovec() calls eth_strip_vlan()
this can result in pkt->ehdr_buf being overflowed, because
ehdr_buf is only sizeof(struct eth_header) bytes large
but eth_strip_vlan() can write
sizeof(struct eth_header) + sizeof(struct vlan_header)
bytes into it.

Devices affected by this problem: vmxnet3.

Cc: qemu-stable@nongnu.org
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Dmitry Fleytman
566342c312 eth: Extend vlan stripping functions
Make VLAN stripping functions return number of bytes
copied to given Ethernet header buffer.

This information should be used to re-compose
packet IOV after VLAN stripping.

Cc: qemu-stable@nongnu.org
Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Fam Zheng
290e6e113b net: Remove useless local var pkt
This has been pointless since commit 605d52e62, which was a
search-and-replace, overlooked the redundancy.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2017-03-06 11:46:02 +08:00
Nikunj A Dadhania
182fe2cf19 target/ppc: use helper for excp handling
Use the helper routine float[32,64]_maddsub_update_excp() in VSX_MADD
macro.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-06 13:17:28 +11:00
Nikunj A Dadhania
3e5b26cf57 target/ppc: fmadd: add macro for updating flags
Adds FPU_MADDSUB_UPDATE macro, this will be used for other routines
having float32/16

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-06 13:17:28 +11:00
Nikunj A Dadhania
806c9d71ab target/ppc: fmadd check for excp independently
Current order of checking does not confirm with the spec
(ISA 3.0: MultiplyAddDP page-469). Change the order and make them
independent of each other.

For example: a = infinity, b = zero, c = SNaN, this should set both
VXIMZ and VXNAN

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-06 13:17:28 +11:00
Igor Mammedov
17b7c39e27 spapr: ensure that all threads within core are on the same NUMA node
Threads within a core shouldn't be on different
NUMA nodes, so if user has misconfgured command
line, fail QEMU at start up to force user fix it.

For now use the first thread on the core as source
of core's node-id. Later when cpu-numa refactoring
lands  it will be switched to core's node-id from
possible_cpus[].

This prevents the same problems as commit 20bb648d
"spapr: Fix default NUMA node allocation for threads",
but for the case of manually configured NUMA node
mappings, instead of just the default case.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-06 10:32:53 +11:00
Cédric Le Goater
7ea6e06717 ppc/xics: register reset handlers for the ICP and ICS objects
The recent changes on the XICS layer removed the XICSState object to
let the sPAPR machine handle the ICP and ICS directly. The reset of
these objects was previously handled by XICSState, which was a SysBus
device, and to keep the same behavior, the ICP and ICS were assigned
to SysbBus.

But that broke the 'info qtree' command in the monitor. 'qtree'
performs a loop on the children of a bus to print their properties and
SysBus devices are expected to be found under SysBus, which is not the
case anymore.

The fix for this problem is to register reset handlers for the ICP and
ICS objects and stop using SysBus for such devices.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Tested-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-06 10:07:38 +11:00
Markus Armbruster
aa3a982e67 qapi: Improve qobject visitor documentation
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-29-git-send-email-armbru@redhat.com>
2017-03-05 09:14:20 +01:00
Markus Armbruster
1f41a645b6 qapi: Fix object input visit beyond end of list
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-28-git-send-email-armbru@redhat.com>
2017-03-05 09:14:20 +01:00
Markus Armbruster
a9416dc62c tests: Cover input visit beyond end of list
When you try to visit beyond the end of a list, the qobject input
visitor crashes, and the string visitor screws returns garbage.  The
generated list visits never go beyond the list end, but manual visits
could.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-27-git-send-email-armbru@redhat.com>
2017-03-05 09:14:20 +01:00
Markus Armbruster
a4a1c70dc7 qapi: Make input visitors detect unvisited list tails
Fix the design flaw demonstrated in the previous commit: new method
check_list() lets input visitors report that unvisited input remains
for a list, exactly like check_struct() lets them report that
unvisited input remains for a struct or union.

Implement the method for the qobject input visitor (straightforward),
and the string input visitor (less so, due to the magic list syntax
there).  The opts visitor's list magic is even more impenetrable, and
all I can do there today is a stub with a FIXME comment.  No worse
than before.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1488544368-30622-26-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-05 09:14:20 +01:00
Markus Armbruster
86ca0dbe04 test-qobject-input-visitor: Cover missing nested struct member
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1488544368-30622-25-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-05 09:14:20 +01:00
Markus Armbruster
9cb8ef3668 tests: Cover partial input visit of list
Demonstrates a design flaw: there is no way to for input visitors to
report that a list visit didn't visit the complete input list.  The
generated list visits always do, but manual visits needn't.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-24-git-send-email-armbru@redhat.com>
2017-03-05 09:14:20 +01:00
Markus Armbruster
3d089cea0d test-string-input-visitor: Improve list coverage
Lists with elements above INT64_MAX don't work (known bug).  Empty
lists don't work (weird).

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-23-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
0f721d168d test-string-input-visitor: Tear down existing test automatically
Call visitor_input_teardown() from visitor_input_test_init(), so you
don't have to call it from the actual tests.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-22-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
77c47de23f tests-qobject-input-strict: Merge into test-qobject-input-visitor
Much of test-qobject-input-strict.c duplicates
test-qobject-input-strict.c, but with less assertions on expected
output:

* test_validate_struct() duplicates test_visitor_in_struct()

* test_validate_struct_nested() duplicates
  test_visitor_in_struct_nested()

* test_validate_list() duplicates the first half of
  test_visitor_in_list()

* test_validate_union_native_list() duplicates
  test_visitor_in_native_list_int()

* test_validate_union_flat() duplicates test_visitor_in_union_flat()

* test_validate_alternate() duplicates the first part of
  test_visitor_in_alternate()

Merge the remaining test cases into test-qobject-input-visitor.c, and
drop the now redundant test-qobject-input-strict.c.

Test case "/visitor/input-strict/fail/list" isn't really about lists,
it's about a bad struct nested in a list.  Rename accordingly.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-21-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
048abb7b20 qapi: Drop unused non-strict qobject input visitor
The split between tests/test-qobject-input-visitor.c and
tests/test-qobject-input-strict.c now makes less sense than ever.  The
next commit will take care of that.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-20-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
ec95f6148c test-qobject-input-visitor: Use strict visitor
The qobject input visitor comes in a strict and a non-strict variant.
This test is the non-strict variant's last user.  Turns out it relies
on non-strict only in test_visitor_in_null(), and just out of
laziness.  We don't actually test the non-strict behavior.

Clean up test_visitor_in_null(), and switch to the strict variant.
The next commit will drop the non-strict variant.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-19-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
05601ed2de qom: Make object_property_set_qobject()'s input visitor strict
Commit 240f64b made all qobject input visitors created outside tests
strict, except for the one in object_property_set_qobject().  That one
was left behind only because Eric couldn't spare the time to figure
out whether making it strict would break anything, with a TODO
comment.  Time to resolve it.

Strict makes a difference only for otherwise successful visits of QAPI
structs or unions.  Let's examine what the callers of
object_property_set_qobject() visit:

* object_property_set_str(), object_property_set_bool(),
  object_property_set_int() visit a QString, QBool, QInt,
  respectively.  Strictness can't matter.

* qmp_qom_set visits its @value argument.  Comes straight from QMP and
  can be anything ('any' in the QAPI schema).  Strictness matters when
  the property's set() method visits a struct or union QAPI type.

  No such methods exist, thus switching to strict can't break
  anything.

  If we acquire such methods in the future, we'll *want* the visitor
  to be strict, so that unexpected members get rejected as they should
  be.

Switch to strict.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-18-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
f332e830e3 qapi: Make string input and opts visitor require non-null input
The string input visitor tries to cope with null input.  Null input
isn't used anywhere, and isn't covered by tests.  Unsurprisingly, it
doesn't fully work: start_list() crashes because it passes the input
via parse_str() to strtoll() unchecked.

Make string_input_visitor_new() assert its argument isn't null, and
drop the code trying to deal with null input.

The opts visitor crashes when you try to actually visit something with
null input.  Make opts_visitor_new() assert its argument isn't null,
mostly for clarity.

qobject_input_visitor_new() already asserts its argument isn't null.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-17-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
a8aec6de2a qapi: Drop string input visitor method optional()
visit_optional() is to be called only between visit_start_struct() and
visit_end_struct().  Visitors that don't support struct visits,
i.e. don't implement start_struct(), end_struct(), have no use for it.
Clarify documentation.

The string input visitor doesn't support struct visits.  Its
parse_optional() is therefore useless.  Drop it.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-16-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
a9fc37f6bc qapi: Improve qobject input visitor error reporting
Error messages refer to nodes of the QObject being visited by name.
Trouble is the names are sometimes less than helpful:

* The name of the root QObject is whatever @name argument got passed
  to the visitor, except NULL gets mapped to "null".  We commonly pass
  NULL.  Not good.

  Avoiding errors "at the root" mitigates.  For instance,
  visit_start_struct() can only fail when the visited object is not a
  dictionary, and we commonly ensure it is beforehand.

* The name of a QDict's member is the member key.  Good enough only
  when this happens to be unique.

* The name of a QList's member is "null".  Not good.

Improve error messages by referring to nodes by path instead, as
follows:

* The path of the root QObject is whatever @name argument got passed
  to the visitor, except NULL gets mapped to "<anonymous>".

* The path of a root QDict's member is the member key.

* The path of a root QList's member is "[%u]", where %u is the list
  index, starting at zero.

* The path of a non-root QDict's member is the path of the QDict
  concatenated with "." and the member key.

* The path of a non-root QList's member is the path of the QList
  concatenated with "[%u]", where %u is the list index.

For example, the incorrect QMP command

    { "execute": "blockdev-add", "arguments": { "node-name": "foo", "driver": "raw", "file": {"driver": "file" } } }

now fails with

    {"error": {"class": "GenericError", "desc": "Parameter 'file.filename' is missing"}}

instead of

    {"error": {"class": "GenericError", "desc": "Parameter 'filename' is missing"}}

and

    { "execute": "input-send-event", "arguments": { "device": "bar", "events": [ [] ] } }

now fails with

    {"error": {"class": "GenericError", "desc": "Invalid parameter type for 'events[0]', expected: object"}}

instead of

    {"error": {"class": "GenericError", "desc": "Invalid parameter type for 'null', expected: QDict"}}

Aside: calling the thing "parameter" is suboptimal for QMP, because
the root object is "arguments" there.

The qobject output visitor doesn't have this problem because it should
not fail.  Same for dealloc and clone visitors.

The string visitors don't have this problem because they visit just
one value, whose name needs to be passed to the visitor as @name.  The
string output visitor shouldn't fail anyway.

The options visitor uses QemuOpts names.  Their name space is flat, so
the use of QDict member keys as names is fine.  NULL names used with
roots and lists could conceivably result in bad error messages.  Left
for another day.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-15-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
58561c2766 qapi: Make QObject input visitor set *list reliably
qobject_input_start_struct() sets *list, except when it fails because
qobject_input_get_object() fails, i.e. the input object doesn't exist.

All the other input visitor start_struct(), start_list(),
start_alternate() always set *obj / *list.

Change qobject_input_start_struct() to match.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-14-git-send-email-armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-05 09:14:19 +01:00
Markus Armbruster
b8874fbfd3 qapi: Clean up after commit 3d344c2
Drop unused QIV_STACK_SIZE and unused qobject_input_start_struct()
parameter errp.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-13-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
910f738b85 qapi: Improve a QObject input visitor error message
The QObject input visitor has three error message formats:

* Parameter '%s' is missing
* "Invalid parameter type for '%s', expected: %s"
* "QMP input object member '%s' is unexpected"

The '%s' are member names (or "null", but I'll fix that later).

The last error message calls the thing "QMP input object member"
instead of "parameter".  Misleading when the visitor is used on
QObjects that don't come from QMP.  Change it to "Parameter '%s' is
unexpected".

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-12-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
99fb0c53c0 qmp: Eliminate silly QERR_QMP_* macros
The QERR_ macros are leftovers from the days of "rich" error objects.

QERR_QMP_BAD_INPUT_OBJECT, QERR_QMP_BAD_INPUT_OBJECT_MEMBER,
QERR_QMP_EXTRA_MEMBER are used in just one place now, except for one
use that has crept into qobject-input-visitor.c.

Drop these macros, to make the (bad) error messages more visible.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-10-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
104fc30279 qmp: Drop duplicated QMP command object checks
qmp_check_input_obj() duplicates qmp_dispatch_check_obj(), except the
latter screws up an error message.  handle_qmp_command() runs first
the former, then the latter via qmp_dispatch(), masking the screwup.

qemu-ga also masks the screwup, because it also duplicates checks,
just differently.

qmp_check_input_obj() exists because handle_qmp_command() needs to
examine the command before dispatching it.  The previous commit got
rid of this need, except for a tracepoint, and a bit of "id" code that
relies on qdict not being null.

Fix up the error message in qmp_dispatch_check_obj(), drop
qmp_check_input_obj() and the tracepoint.  Protect the "id" code with
a conditional.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-9-git-send-email-armbru@redhat.com>
2017-03-05 09:14:19 +01:00
Markus Armbruster
635db18f68 qmp: Clean up how we enforce capability negotiation
To enforce capability negotiation before normal operation,
handle_qmp_command() inspects every command before it's handed off to
qmp_dispatch().  This is a bit of a layering violation, and results in
duplicated code.

Before capability negotiation (!cur_mon->in_command_mode), we fail
commands other than "qmp_capabilities".  This is what enforces
capability negotiation.

Afterwards, we fail command "qmp_capabilities".

Clean this up as follows.

The obvious place to fail a command is the command itself, so move the
"afterwards" check to qmp_qmp_capabilities().

We do the "before" check in every other command, but that would be
bothersome.  Instead, start with an alternate list of commands that
contains only "qmp_capabilities".  Switch to the full list in
qmp_qmp_capabilities().

Additionally, replace the generic human-readable error message for
CommandNotFound by one that reminds the user to run qmp_capabilities.
Without that, we'd regress commit 2d5a834.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1488544368-30622-8-git-send-email-armbru@redhat.com>
[Mirco-optimization squashed in, commit message typo fixed]
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-05 09:14:11 +01:00
Markus Armbruster
9b0c9a6349 qapi-introspect: Mangle --prefix argument properly for C
qapi-introspect.py --prefix hasn't been used so far, but fix it anyway.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1488544368-30622-7-git-send-email-armbru@redhat.com>
[Commit message improved]
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-05 09:12:29 +01:00
Markus Armbruster
1527badb95 qapi: Support multiple command registries per program
The command registry encapsulates a single command list.  Give the
functions using it a parameter instead.  Define suitable command lists
in monitor, guest agent and test-qmp-commands.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1488544368-30622-6-git-send-email-armbru@redhat.com>
[Debugging turds buried]
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-05 09:12:25 +01:00
Markus Armbruster
0587568780 qmp: Dumb down how we run QMP command registration
The way we get QMP commands registered is high tech:

* qapi-commands.py generates qmp_init_marshal() that does the actual work

* it also generates the magic to register it as a MODULE_INIT_QAPI
  function, so it runs when someone calls
  module_call_init(MODULE_INIT_QAPI)

* main() calls module_call_init()

QEMU needs to register a few non-qapified commands.  Same high tech
works: monitor.c has its own qmp_init_marshal() along with the magic
to make it run in module_call_init(MODULE_INIT_QAPI).

QEMU also needs to unregister commands that are not wanted in this
build's configuration (commit 5032a16).  Simple enough:
qmp_unregister_commands_hack().  The difficulty is to make it run
after the generated qmp_init_marshal().  We can't simply run it in
monitor.c's qmp_init_marshal(), because the order in which the
registered functions run is indeterminate.  So qmp_init_marshal()
registers qmp_unregister_commands_hack() separately.  Since
registering *appends* to the list of registered functions, this will
make it run after all the functions that have been registered already.

I suspect it takes a long and expensive computer science education to
not find this silly.

Dumb it down as follows:

* Drop MODULE_INIT_QAPI entirely

* Give the generated qmp_init_marshal() external linkage.

* Call it instead of module_call_init(MODULE_INIT_QAPI)

* Except in QEMU proper, call new monitor_init_qmp_commands() that in
  turn calls the generated qmp_init_marshal(), registers the
  additional commands and unregisters the unwanted ones.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-5-git-send-email-armbru@redhat.com>
2017-03-05 09:02:10 +01:00
Markus Armbruster
f66e7ac88c qmp-test: New, covering basic QMP protocol
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-4-git-send-email-armbru@redhat.com>
2017-03-05 09:02:10 +01:00
Markus Armbruster
13420ef837 libqtest: Work around a "QMP wants a newline" bug
The next commit is going to add a test that calls qmp("null").
Curiously, this hangs.  Here's why.

qmp_fd_sendv() doesn't send newlines.  Not even when @fmt contains
some.  At first glance, the QMP parser seems to be fine with that.
However, it turns out that it fails to react to input until it sees
either a newline, an object or an array.  To reproduce, feed to a QMP
monitor like this:

    $ echo -n 'null' | socat UNIX:/work/armbru/images/test-qmp STDIO
    {"QMP": {"version": {"qemu": {"micro": 50, "minor": 8, "major": 2}, "package": " (v2.8.0-1195-gf84141e-dirty)"}, "capabilities": []}}

No output after the greeting.

Add a newline:

    $ echo 'null' | socat UNIX:/work/armbru/images/test-qmp STDIO
    {"QMP": {"version": {"qemu": {"micro": 50, "minor": 8, "major": 2}, "package": " (v2.8.0-1195-gf84141e-dirty)"}, "capabilities": []}}
    {"error": {"class": "GenericError", "desc": "Expected 'object' in QMP input"}}

Correct output for input 'null'.

Add an object instead:

    $ echo -n 'null { "execute": "qmp_capabilities" }' | socat UNIX:qmp-socket STDIO
    {"QMP": {"version": {"qemu": {"micro": 50, "minor": 8, "major": 2}, "package": " (v2.8.0-1195-gf84141e-dirty)"}, "capabilities": []}}
    {"error": {"class": "GenericError", "desc": "Expected 'object' in QMP input"}}
    {"return": {}}

Also correct output.

Work around this QMP bug by having qmp_fd_sendv() append a newline.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-3-git-send-email-armbru@redhat.com>
2017-03-05 09:02:10 +01:00
Markus Armbruster
74d8c9d99d qga: Fix crash on non-dictionary QMP argument
The value of key 'arguments' must be a JSON object.  qemu-ga neglects
to check, and crashes.  To reproduce, send

    { 'execute': 'guest-sync', 'arguments': [] }

to qemu-ga.

do_qmp_dispatch() uses qdict_get_qdict() to get the arguments.  When
not a JSON object, this gets a null pointer, which flows through the
generated marshalling function to qobject_input_visitor_new(), where
it fails the assertion.  qmp_dispatch_check_obj() needs to catch this
error.

QEMU isn't affected, because it runs qmp_check_input_obj() first,
which basically duplicates qmp_dispatch_check_obj()'s checks, plus the
missing one.

Fix by copying the missing one from qmp_check_input_obj() to
qmp_dispatch_check_obj().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Cc: Michael Roth <mdroth@linux.vnet.ibm.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1488544368-30622-2-git-send-email-armbru@redhat.com>
2017-03-05 09:02:10 +01:00
Peter Maydell
17783ac828 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.9-20170303' into staging
ppc patch queuye for 2017-03-03

This will probably be my last pull request before the hard freeze.  It
has some new work, but that has all been posted in draft before the
soft freeze, so I think it's reasonable to include in qemu-2.9.

This batch has:
    * A substantial amount of POWER9 work
        * Implements the legacy (hash) MMU for POWER9
	* Some more preliminaries for implementing the POWER9 radix
          MMU
	* POWER9 has_work
	* Basic POWER9 compatibility mode handling
	* Removal of some premature tests
    * Some cleanups and fixes to the existing MMU code to make the
      POWER9 work simpler
    * A bugfix for TCG multiply adds on power
    * Allow pseries guests to access PCIe extended config space

This also includes a code-motion not strictly in ppc code - moving
getrampagesize() from ppc code to exec.c.  This will make some future
VFIO improvements easier, Paolo said it was ok to merge via my tree.

# gpg: Signature made Fri 03 Mar 2017 03:20:36 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.9-20170303:
  target/ppc: rewrite f[n]m[add,sub] using float64_muladd
  spapr: Small cleanup of PPC MMU enums
  spapr_pci: Advertise access to PCIe extended config space
  target/ppc: Rework hash mmu page fault code and add defines for clarity
  target/ppc: Move no-execute and guarded page checking into new function
  target/ppc: Add execute permission checking to access authority check
  target/ppc: Add Instruction Authority Mask Register Check
  hw/ppc/spapr: Add POWER9 to pseries cpu models
  target/ppc/POWER9: Add cpu_has_work function for POWER9
  target/ppc/POWER9: Add POWER9 pa-features definition
  target/ppc/POWER9: Add POWER9 mmu fault handler
  target/ppc: Don't gen an SDR1 on POWER9 and rework register creation
  target/ppc: Add patb_entry to sPAPRMachineState
  target/ppc/POWER9: Add POWERPC_MMU_V3 bit
  powernv: Don't test POWER9 CPU yet
  exec, kvm, target-ppc: Move getrampagesize() to common code
  target/ppc: Add POWER9/ISAv3.00 to compat_table

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-04 16:31:14 +00:00
Paolo Bonzini
eeb61d4f82 ppc: avoid typedef redefinitions
These cause compilation failures on CentOS 6 or other operating
systems with older GCCs.

Cc: David Gibson <dgibson@redhat.com>
Cc: qemu-ppc@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1488558530-21016-3-git-send-email-pbonzini@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-04 15:14:34 +00:00
Paolo Bonzini
4ae4b609ee nios2: avoid anonymous unions in designated initializers.
These cause compilation failures on CentOS 6 or other operating
systems with older GCCs.

Cc: Richard Henderson <rth@twiddle.net>
Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-04 14:05:48 +00:00
Paolo Bonzini
eff235eb2b hppa: avoid anonymous unions in designated initializers.
These cause compilation failures on CentOS 6 or other operating
systems with older GCCs.

Cc: Richard Henderson <rth@twiddle.net>
Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Message-id: 1488558530-21016-1-git-send-email-pbonzini@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-04 12:52:01 +00:00
Peter Maydell
5febe7671f Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* kernel header update (requested by David and Vijay)
* GuestPanicInformation fixups (Anton)
* record/replay icount fixes (Pavel)
* cpu-exec cleanup, unification of icount_decr with tcg_exit_req (me)
* KVM_CAP_IMMEDIATE_EXIT support (me)
* vmxcap update (me)
* iscsi locking fix (me)
* VFIO ram device fix (Yongji)
* scsi-hd vs. default CD-ROM (Hervé)
* SMI migration fix (Dave)
* spice-char segfault (Li Qiang)
* improved "info mtree -f" (me)

# gpg: Signature made Fri 03 Mar 2017 15:43:04 GMT
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (21 commits)
  iscsi: fix missing unlock
  memory: show region offset and ROM/RAM type in "info mtree -f"
  x86: Work around SMI migration breakages
  spice-char: fix segfault in char_spice_finalize
  vl: disable default cdrom when using explicitely scsi-hd
  memory: Introduce DEVICE_HOST_ENDIAN for ram device
  qmp-events: fix GUEST_PANICKED description formatting
  qapi: flatten GuestPanicInformation union
  vmxcap: update for September 2016 SDM
  vmxcap: port to Python 3
  KVM: use KVM_CAP_IMMEDIATE_EXIT
  kvm: use atomic_read/atomic_set to access cpu->exit_request
  KVM: move SIG_IPI handling to kvm-all.c
  KVM: do not use sigtimedwait to catch SIGBUS
  KVM: remove kvm_arch_on_sigbus
  cpus: reorganize signal handling code
  KVM: x86: cleanup SIGBUS handlers
  cpus: remove ugly cast on sigbus_handler
  cpu-exec: remove unnecessary check of cpu->exit_request
  replay: check icount in cpu exec loop
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-03 16:41:09 +00:00
Paolo Bonzini
f6eb0b319e iscsi: fix missing unlock
Reported by Coverity.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:41:20 +01:00
Paolo Bonzini
377a07aa0d memory: show region offset and ROM/RAM type in "info mtree -f"
"info mtree -f" output is currently hard to use for large RAM regions, because
there is no hint as to what part of the region is being mapped.  Add the offset
if it is nonzero.

Secondly, FlatView has a readonly field, that can override the MemoryRegion
in the presence of aliases.  Take it into account.

Together, with this patch this:

address-space (flat view): KVM-SMRAM
  0000000000000000-00000000000bffff (prio 0, ram): pc.ram
  00000000000c0000-00000000000c9fff (prio 0, ram): pc.ram
  00000000000ca000-00000000000ccfff (prio 0, ram): pc.ram
  00000000000cd000-00000000000ebfff (prio 0, ram): pc.ram
  00000000000ec000-00000000000effff (prio 0, ram): pc.ram
  00000000000f0000-00000000000fffff (prio 0, ram): pc.ram
  0000000000100000-00000000bfffffff (prio 0, ram): pc.ram
  00000000fd000000-00000000fdffffff (prio 1, ram): vga.vram
  00000000febc0000-00000000febdffff (prio 1, i/o): e1000-mmio
  00000000febf0400-00000000febf041f (prio 0, i/o): vga ioports remapped
  00000000febf0500-00000000febf0515 (prio 0, i/o): bochs dispi interface
  00000000febf0600-00000000febf0607 (prio 0, i/o): qemu extended regs
  00000000fec00000-00000000fec00fff (prio 0, i/o): kvm-ioapic
  00000000fed00000-00000000fed003ff (prio 0, i/o): hpet
  00000000fee00000-00000000feefffff (prio 4096, i/o): kvm-apic-msi
  00000000fffc0000-00000000ffffffff (prio 0, rom): pc.bios
  0000000100000000-000000013fffffff (prio 0, ram): pc.ram

becomes this:

address-space (flat view): KVM-SMRAM
  0000000000000000-00000000000bffff (prio 0, ram): pc.ram
  00000000000c0000-00000000000c9fff (prio 0, rom): pc.ram @00000000000c0000
  00000000000ca000-00000000000ccfff (prio 0, ram): pc.ram @00000000000ca000
  00000000000cd000-00000000000ebfff (prio 0, rom): pc.ram @00000000000cd000
  00000000000ec000-00000000000effff (prio 0, ram): pc.ram @00000000000ec000
  00000000000f0000-00000000000fffff (prio 0, rom): pc.ram @00000000000f0000
  0000000000100000-00000000bfffffff (prio 0, ram): pc.ram @0000000000100000
  00000000fd000000-00000000fdffffff (prio 1, ram): vga.vram
  00000000febc0000-00000000febdffff (prio 1, i/o): e1000-mmio
  00000000febf0400-00000000febf041f (prio 0, i/o): vga ioports remapped
  00000000febf0500-00000000febf0515 (prio 0, i/o): bochs dispi interface
  00000000febf0600-00000000febf0607 (prio 0, i/o): qemu extended regs
  00000000fec00000-00000000fec00fff (prio 0, i/o): kvm-ioapic
  00000000fed00000-00000000fed003ff (prio 0, i/o): hpet
  00000000fee00000-00000000feefffff (prio 4096, i/o): kvm-apic-msi
  00000000fffc0000-00000000ffffffff (prio 0, rom): pc.bios
  0000000100000000-000000013fffffff (prio 0, ram): pc.ram @00000000c0000000

This should make it easier to understand what's going on.

Cc: Peter Xu <peterx@redhat.com>
Cc: "William Tambe" <tambewilliam@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:03 +01:00
Dr. David Alan Gilbert
fc3a1fd74f x86: Work around SMI migration breakages
Migration from a 2.3.0 qemu results in a reboot on the receiving QEMU
due to a disagreement about SM (System management) interrupts.

2.3.0 didn't have much SMI support, but it did set CPU_INTERRUPT_SMI
and this gets into the migration stream, but on 2.3.0 it
never got delivered.

~2.4.0 SMI interrupt support was added but was broken - so
that when a 2.3.0 stream was received it cleared the CPU_INTERRUPT_SMI
but never actually caused an interrupt.

The SMI delivery was recently fixed by 68c6efe07a, but the
effect now is that an incoming 2.3.0 stream takes the interrupt it
had flagged but it's bios can't actually handle it(I think
partly due to the original interrupt not being taken during boot?).
The consequence is a triple(?) fault and a reboot.

Tested from:
  2.3.1 -M 2.3.0
  2.7.0 -M 2.3.0
  2.8.0 -M 2.3.0
  2.8.0 -M 2.8.0

This corresponds to RH bugzilla entry 1420679.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170223133441.16010-1-dgilbert@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:03 +01:00
Li Qiang
f20e6f8cd4 spice-char: fix segfault in char_spice_finalize
In 'qemu_chr_open_spice_vmc' if the 'psubtype' is NULL, it will
call 'char_spice_finalize'. But as the SpiceChardev is not inserted
in the 'spice_chars' list, the 'QLIST_REMOVE' will cause a segfault.
Add a detect to avoid it.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-Id: <1487665107-88004-1-git-send-email-liqiang6-s@360.cn>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Li Qiang <liq3ea@gmail.com>
2017-03-03 16:40:03 +01:00
Hervé Poussineau
f6f99b4808 vl: disable default cdrom when using explicitely scsi-hd
In commit af6bf1328e (May 2011),
ide-hd, ide-cd and scsi-cd have been added to disable default cdrom,
"or else you can't put one on secondary master without -nodefaults".

Make it the same for scsi-hd, so you can put one on scsi-id 2 without
using -nodefaults.
scsi-hd has probably been forgotten, as it has been added in the
preceding commit (b443ae6713).

Affected users are the ones using a machine with SCSI devices and start QEMU
with -device scsi-hd but without -device scsi-cd or -cdrom
In that case, the default cdrom device will disappear instead of being empty.

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Message-Id: <1487623279-29930-1-git-send-email-hpoussin@reactos.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:03 +01:00
Yongji Xie
c99a29e702 memory: Introduce DEVICE_HOST_ENDIAN for ram device
At the moment ram device's memory regions are DEVICE_NATIVE_ENDIAN. It's
incorrect. This memory region is backed by a MMIO area in host, so the
uint64_t data that MemoryRegionOps read from/write to this area should be
host-endian rather than target-endian. Hence, current code does not work
when target and host endianness are different which is the most common case
on PPC64. To fix it, this introduces DEVICE_HOST_ENDIAN for the ram device.

This has been tested on PPC64 BE/LE host/guest in all possible combinations
including TCG.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Yongji Xie <xyjxie@linux.vnet.ibm.com>
Message-Id: <1488171164-28319-1-git-send-email-xyjxie@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:03 +01:00
Anton Nefedov
11953be792 qmp-events: fix GUEST_PANICKED description formatting
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Message-Id: <1487614915-18710-4-git-send-email-den@openvz.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:03 +01:00
Anton Nefedov
e8ed97a647 qapi: flatten GuestPanicInformation union
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Message-Id: <1487614915-18710-3-git-send-email-den@openvz.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:03 +01:00
Paolo Bonzini
025533f6ee vmxcap: update for September 2016 SDM
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:02 +01:00
Paolo Bonzini
c3e31eaa21 vmxcap: port to Python 3
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:02 +01:00
Paolo Bonzini
cf0f7cf903 KVM: use KVM_CAP_IMMEDIATE_EXIT
The purpose of the KVM_SET_SIGNAL_MASK API is to let userspace "kick"
a VCPU out of KVM_RUN through a POSIX signal.  A signal is attached
to a dummy signal handler; by blocking the signal outside KVM_RUN and
unblocking it inside, this possible race is closed:

          VCPU thread                     service thread
   --------------------------------------------------------------
        check flag
                                          set flag
                                          raise signal
        (signal handler does nothing)
        KVM_RUN

However, one issue with KVM_SET_SIGNAL_MASK is that it has to take
tsk->sighand->siglock on every KVM_RUN.  This lock is often on a
remote NUMA node, because it is on the node of a thread's creator.
Taking this lock can be very expensive if there are many userspace
exits (as is the case for SMP Windows VMs without Hyper-V reference
time counter).

KVM_CAP_IMMEDIATE_EXIT provides an alternative, where the flag is
placed directly in kvm_run so that KVM can see it:

          VCPU thread                     service thread
   --------------------------------------------------------------
                                          raise signal
        signal handler
          set run->immediate_exit
        KVM_RUN
          check run->immediate_exit

The previous patches changed QEMU so that the only blocked signal is
SIG_IPI, so we can now stop using KVM_SET_SIGNAL_MASK and sigtimedwait
if KVM_CAP_IMMEDIATE_EXIT is available.

On a 14-VCPU guest, an "inl" operation goes down from 30k to 6k on
an unlocked (no BQL) MemoryRegion, or from 30k to 15k if the BQL
is involved.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:02 +01:00
Paolo Bonzini
c5c6679d37 kvm: use atomic_read/atomic_set to access cpu->exit_request
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:02 +01:00
Paolo Bonzini
18268b6016 KVM: move SIG_IPI handling to kvm-all.c
This lets us remove a bunch of CONFIG_LINUX defines.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:02 +01:00
Paolo Bonzini
2ae41db262 KVM: do not use sigtimedwait to catch SIGBUS
Call kvm_on_sigbus_vcpu asynchronously from the VCPU thread.
Information for the SIGBUS can be stored in thread-local variables
and processed later in kvm_cpu_exec.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:02 +01:00
Paolo Bonzini
4d39892cca KVM: remove kvm_arch_on_sigbus
Build it on kvm_arch_on_sigbus_vcpu instead.  They do the same
for "action optional" SIGBUSes, and the main thread should never get
"action required" SIGBUSes because it blocks the signal.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:02 +01:00
Paolo Bonzini
a16fc07ebd cpus: reorganize signal handling code
Move the KVM "eat signals" code under CONFIG_LINUX, in preparation
for moving it to kvm-all.c; reraise non-MCE SIGBUS immediately,
without passing it to KVM.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:02 +01:00
Paolo Bonzini
20e0ff59a9 KVM: x86: cleanup SIGBUS handlers
This patch should have no semantic change.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:02 +01:00
Paolo Bonzini
d98d407234 cpus: remove ugly cast on sigbus_handler
The cast is there because sigbus_handler is invoked via sigfd_handler.
But it feels just wrong to use struct qemu_signalfd_siginfo in the
prototype of a function that is passed to sigaction.

Instead, do a simple-minded conversion of qemu_signalfd_siginfo to
siginfo_t.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:40:02 +01:00
Paolo Bonzini
30f3dda24b Merge branch 'icount-update' into HEAD
Merge the original development branch due to breakage caused by the
MTTCG merge.

Conflicts:
	cpu-exec.c
	translate-common.c

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-03 16:39:18 +01:00
Peter Maydell
5b10b94bd5 Merge remote-tracking branch 'remotes/ehabkost/tags/numa-pull-request' into staging
NUMA documentation update

# gpg: Signature made Fri 03 Mar 2017 13:11:25 GMT
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/numa-pull-request:
  qemu-options: Rewrite -numa documentation

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-03 14:59:45 +00:00
Peter Maydell
9a17d32721 Merge remote-tracking branch 'remotes/dgibson/tags/submodule-update-20170303' into staging
submodule updates (SLOF & dtc) 2017-03-03

This set of patches updates the SLOF and dtc submodules for qemu-2.9.

The SLOF update could have gone in my ppc pull request earlier today,
but I forgot it.  It should be safe to apply in either order with that
set though.

The dtc (and libfdt) update brings us up to dtc 1.4.3 which includes
some things that will be useful in future.

# gpg: Signature made Fri 03 Mar 2017 06:29:31 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/submodule-update-20170303:
  Update dtc submodule to v1.4.3
  pseries: Update SLOF firmware image

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-03 14:04:27 +00:00
Eduardo Habkost
4b9a5dd762 qemu-options: Rewrite -numa documentation
Rewrite the -numa documentation to clarify what exactly it does.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170123180632.28942-3-ehabkost@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-03-03 10:08:03 -03:00
Peter Maydell
1ec2dca691 Merge remote-tracking branch 'remotes/berrange/tags/pull-qio-2017-02-27-2' into staging
Merge qio 2017/02/27 v2

# gpg: Signature made Thu 02 Mar 2017 16:09:27 GMT
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qio-2017-02-27-2:
  io: fully parse & validate HTTP headers for websocket protocol handshake
  tests: fix leaks in test-io-channel-command
  io: fix decoding when multiple websockets frames arrive at once

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-03 12:53:33 +00:00
Peter Maydell
508e038a5d dtc: Revert unintentional submodule downgrade from commit 077dd74239
Commit 077dd74239 inadvertently downgraded the 'dtc' submodule,
undoing the increment added in commit 6e85fce022. Revert this,
returning the submodule state to where we should be.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-03 12:48:42 +00:00
Peter Maydell
9a81b792cc Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, pc: fixes, features

virtio support for region caches broke a bunch of stuff - fixing most of
it though it's not ideal.  Still pondering the right way to fix it.
New: VM gen ID and hotplug for PXB.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Thu 02 Mar 2017 06:19:17 GMT
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  hw/pxb-pcie: fix PCI Express hotplug support
  tests/acpi: update DSDT after last patch
  acpi: simplify _OSC
  virtio: unbreak virtio-pci with IOMMU after caching ring translations
  virtio: add missing region cache init in virtio_load()
  virtio: invalidate memory in vring_set_avail_event()
  virtio: guard vring access when setting notification
  virtio: check for vring setup in virtio_queue_empty
  MAINTAINERS: Add VM Generation ID entries
  tests: Move reusable ACPI code into a utility file
  qmp/hmp: add query-vm-generation-id and 'info vm-generation-id' commands
  ACPI: Add Virtual Machine Generation ID support
  ACPI: Add vmgenid blob storage to the build tables
  docs: VM Generation ID device description
  linker-loader: Add new 'write pointer' command

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-03 10:09:03 +00:00
David Gibson
125a9cb8e3 Update dtc submodule to v1.4.3
Since the last submodule update (which was v1.4.2) dtc and libfdt have
gained some features which would be useful in qemu.  There's now a v1.4.3
upstream release, so update our submodule to point to it.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 17:25:32 +11:00
Alexey Kardashevskiy
a438fa121f pseries: Update SLOF firmware image
Various fixes in this update, the full list is:

  > qemu-bootlist: Take the "-boot strict=off" setting properly into account
  > virtio-scsi: initialize vring avail queue buffers
  > virtio: Remove global variables in block and 9p driver
  > Remove superfluous checkpoints in tree.fs
  > Provide "write" function in the disk-label package
  > virtio: Implement block write support
  > scsi: Add SCSI block write support
  > deblocker: Add a 'write' function
  > virtio-scsi: Fix descriptor order for SCSI WRITE commands
  > board-qemu: Add a possibility to use hvterm input instead of USB keyboard
  > Do not try to use virtio-gpu in VGA mode
  > virtio: Fix stack comment of virtio-blk-read
  > envvar: Do not read default values for /options from the NVRAM anymore
  > envvar: Set properties in /options during "(set-defaults)"

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 17:25:32 +11:00
Nikunj A Dadhania
992d7e976c target/ppc: rewrite f[n]m[add,sub] using float64_muladd
Use the softfloat api for fused multiply-add.
Introduce routine to set the FPSCR flags VXNAN, VXIMZ nad VMISI.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:38:33 +11:00
Sam Bobroff
ec975e839c spapr: Small cleanup of PPC MMU enums
The PPC MMU types are sometimes treated as if they were a bit field
and sometime as if they were an enum which causes maintenance
problems: flipping bits in the MMU type (which is done on both the 1TB
segment and 64K segment bits) currently produces new MMU type
values that are not handled in every "switch" on it, sometimes causing
an abort().

This patch provides some macros that can be used to filter out the
"bit field-like" bits so that the remainder of the value can be
switched on, like an enum. This allows removal of all of the
"degraded" types from the list and should ease maintenance.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
David Gibson
bb99864528 spapr_pci: Advertise access to PCIe extended config space
The (paravirtual) PCI host bridge on the 'pseries' machine in most
regards acts like a regular PCI bus, rather than a PCIe bus.  Despite
this, though, it does allow access to the PCIe extended config space.

We already implemented the RTAS methods to allow this access.. but
forgot to put the markers into the device tree so that guest's know it
is there.  This adds them in.

With this, a pseries guest is able to view extended config space on
(for example an e1000e device.  This should be enough to allow guests
to use at least some PCIe devices.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Suraj Jitindar Singh
da82c73a95 target/ppc: Rework hash mmu page fault code and add defines for clarity
The hash mmu page fault handling code is responsible for generating ISIs
and DSIs when access permissions cause an access to fail. Part of this
involves setting the srr1 or dsisr registers to indicate what causes the
access to fail. Add defines for the bit fields of these registers and
rework the code to use these new defines in order to improve readability
and code clarity.

While we're here, update what is logged when an access fails to include
information as to what caused to access to fail for debug purposes.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
[dwg: Moved constants to cpu.h since they're not MMUv3 specific]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Suraj Jitindar Singh
07a68f9907 target/ppc: Move no-execute and guarded page checking into new function
A pte entry has bit fields which can be used to make a page no-execute or
guarded, if either of these bits are set then an instruction access to this
page will fail. Currently these bits are checked with the pp_prot function
however the ISA specifies that the access authority controlled by the
key-pp value pair should only be checked on an instruction access after
the no-execute and guard bits have already been verified to permit the
access.

Move the no-execute and guard bit checking into a new separate function.
Note that we can remove the check for the no-execute bit in the slb entry
since this check was already performed above when we obtained the slb
entry.

In the event that the no-execute or guard bits are set, an ISI should be
generated with the SRR1_NOEXEC_GUARD (0x10000000) bit set in srr1. Add a
define for this for clarity.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
[dwg: Move constants to cpu.h since they're not MMUv3 specific]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Suraj Jitindar Singh
347a5c73ba target/ppc: Add execute permission checking to access authority check
Basic storage protection defines various access authority permissions
based on a slb storage key and pte pp value pair. This access authority
defines read, write and execute permissions however currently we only
use this to control read and write permissions and ignore the execute
control.

Fix the code to allow execute permissions based on the key-pp value pair.
Execute is allowed under the same conditions which enable reads.
(i.e. read permission -> execute permission)

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Suraj Jitindar Singh
a6152b52bc target/ppc: Add Instruction Authority Mask Register Check
The instruction authority mask register (IAMR) can be used to restrict
permissions for instruction fetch accesses on a per key basis for each
of 32 different key values. Access permissions are derived based on the
specific key value stored in the relevant page table entry.

The IAMR was introduced in, and is present in processors since, POWER8
(ISA v2.07). Thus introduce a function to check access permissions based
on the pte key value and the contents of the IAMR when handling a page
fault to ensure sufficient access permissions for an instruction fetch.

A hash pte contains a key value in bits 2:3|52:54 of the second double word
of the pte, this key value gives an index into the IAMR which contains 32
2-bit access masks. If the least significant bit of the 2-bit access mask
corresponding to the given key value is set (IAMR[key] & 0x1 == 0x1) then
the instruction fetch is not permitted and an ISI is generated accordingly.
While we're here, add defines for the srr1 bits to be set for the ISI for
clarity.

e.g.

pte:
dw0 [XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX]
dw1 [XX01XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010XXXXXXXXX]
       ^^                                                ^^^
key = 01010 (0x0a)

IAMR: [XXXXXXXXXXXXXXXXXXXX01XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX]
                           ^^
Access mask = 0b01

Test access mask: 0b01 & 0x1 == 0x1

Least significant bit of the access mask is set, thus the instruction fetch
is not permitted. We should generate an instruction storage interrupt (ISI)
with bit 42 of SRR1 set to indicate access precluded by virtual page class
key protection.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
[dwg: Move new constants to cpu.h, since they're not MMUv3 specific]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Suraj Jitindar Singh
24d8e5655f hw/ppc/spapr: Add POWER9 to pseries cpu models
Add POWER9 cpu to list of spapr core models which allows it to be specified
as the cpu model for a pseries guest (e.g. -machine pseries -cpu POWER9).

This now allows a POWER9 cpu to boot to userspace in tcg emulation for a
pseries machine with a legacy kernel.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Suraj Jitindar Singh
6f46dcb3e5 target/ppc/POWER9: Add cpu_has_work function for POWER9
The cpu has work function is used to mask interrupts used to determine
if there is work for the cpu based on the LPCR. Add a function to do this
for POWER9 and add it to the POWER9 cpu definition. This is similar to that
for POWER8 except using the LPCR bits as defined for POWER9.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Suraj Jitindar Singh
4975c098c9 target/ppc/POWER9: Add POWER9 pa-features definition
Add a pa-features definition which includes all of the new fields which
have been added, note we don't claim support for any of these new features
at this stage.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Suraj Jitindar Singh
b2899495e3 target/ppc/POWER9: Add POWER9 mmu fault handler
Add a new mmu fault handler for the POWER9 cpu and add it as the handler
for the POWER9 cpu definition.

This handler checks if the guest is radix or hash based on the value in the
partition table entry and calls the correct fault handler accordingly.

The hash fault handling code has also been updated to check if the
partition is using segment tables.

Currently only legacy hash (no segment tables) is supported.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Suraj Jitindar Singh
4f4f28ffc1 target/ppc: Don't gen an SDR1 on POWER9 and rework register creation
POWER9 doesn't have a storage description register 1 (SDR1) which is used
to store the base and size of the hash table. Thus we don't need to
generate this register on the POWER9 cpu model. While we're here, the
register generation code for 970, POWER5+, POWER<7/8/9> in general is a
mess where we call a generic function from a model specific function which
then attempts to call model specific functions, so rework this for
readability.

We update ppc_cpu_dump_state so that "info registers" will only display
the value of sdr1 if the register has been generated.

As mentioned above the register generation for the pcc->init_proc
function for 970, POWER5+, POWER7, POWER8 and POWER9 has been reworked
for improved clarity. Instead of calling init_proc_book3s_64 which then
attempts to generate the correct registers through a mess of if statements,
we remove this function and instead call the appropriate register
generation functions directly. This follows the register generation model
used for earlier cpu models (pre-970) whereby cpu specific registers are
generated directly in the init_proc function and makes it easier to
add/remove specific registers for new cpu models.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Suraj Jitindar Singh
9861bb3efd target/ppc: Add patb_entry to sPAPRMachineState
ISA v3.00 adds the idea of a partition table which is used to store the
address translation details for all partitions on the system. The partition
table consists of double word entries indexed by partition id where the second
double word contains the location of the process table in guest memory. The
process table is registered by the guest via a h-call.

We need somewhere to store the address of the process table so we add an entry
to the sPAPRMachineState struct called patb_entry to represent the second
doubleword of a single partition table entry corresponding to the current
guest. We need to store this value so we know if the guest is using radix or
hash translation and the location of the corresponding process table in guest
memory. Since we only have a single guest per qemu instance, we only need one
entry.

Since the partition table is technically a hypervisor resource we require that
access to it is abstracted by the virtual hypervisor through the get_patbe()
call. Currently the value of the entry is never set (and thus
defaults to 0 indicating hash), but it will be required to both implement
POWER9 kvm support and tcg radix support.

We also add this field to be migrated as part of the sPAPRMachineState as we
will need it on the receiving side as the guest will never tell us this
information again and we need it to perform translation.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
David Gibson
0922f1e487 target/ppc/POWER9: Add POWERPC_MMU_V3 bit
For easier handling of future processors using the POWER9 or something
close to it, add a new bit in the MMU model.  This was originally from a
revised version of 86cf1e9 "target/ppc/POWER9: Add ISAv3.00 MMU definition"
but the older version of the patch was already merged.  This makes the
change on top of the original version.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
David Gibson
eaa477ca4e powernv: Don't test POWER9 CPU yet
A couple of tests for the work-in-progress 'powernv' machine type attempt
to test on POWER9 CPUs.  However the POWER9 CPU support is incomplete and
this doesn't really work.  In particular the firmware image we have
currently assumes the presence of the SDR1 register, which no longer exists
on POWER9.  We only got away with this so far, because of a different bug
which added SDR1 to POWER9 even though it shouldn't be there.

For now, remove POWER9 testing of powernv, POWER8 testing will do for now
until the POWER9 support is more complete.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Alexey Kardashevskiy
9c60766887 exec, kvm, target-ppc: Move getrampagesize() to common code
getrampagesize() returns the largest supported page size and mainly
used to know if huge pages are enabled.

However is implemented in target-ppc/kvm.c and not available
in TCG or other architectures.

This renames and moves gethugepagesize() to mmap-alloc.c where
fd-based analog of it is already implemented. This renames and moves
getrampagesize() to exec.c as it seems to be the common place for
helpers like this.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Suraj Jitindar Singh
9b44c836dc target/ppc: Add POWER9/ISAv3.00 to compat_table
compat_table contains the list of logical pvr compat modes which a cpu can
operate in. It is a list of struct CompatInfo which contains the given pvr
value for a compat mode, the pcr bits which should be set to operate in
that compat mode, the pcr level which must be present in pcr_supported for
a processor to support that compat mode and the max threads possible in
that compat mode.

Add an entry for the POWER9/ISAv3.00 logical pvr which represents a
processor running with support for logical pvr 0x0f000005. A processor
running in this mode should have PCR_COMPAT_3_00 set in the pcr (if
available in pcr_mask) and should have PCR_COMPAT_3_00 in pcr_supported
to indicate that it is capable of running in this compat mode.

Also add PCR_COMPAT_3_00 to the bits which must be set for all previous
compat modes. Since no processor models contain this bit yet in pcr_mask
it will never be set, but this ensures we don't forget to in the future.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-03 11:30:59 +11:00
Peter Maydell
54639aed97 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Thu 02 Mar 2017 03:42:59 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  block/rbd: add support for 'mon_host', 'auth_supported' via QAPI
  block/rbd: add blockdev-add support
  block/rbd: parse all options via bdrv_parse_filename
  block/rbd: add all the currently supported runtime_opts
  block/rbd: don't copy strings in qemu_rbd_next_tok()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 23:20:37 +00:00
Peter Maydell
ecb24d334a Merge remote-tracking branch 'remotes/rth/tags/pull-tgt-20170302' into staging
Queued sparc patch

# gpg: Signature made Wed 01 Mar 2017 19:53:21 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tgt-20170302:
  target/sparc: Restore ldstub of odd asis

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 22:06:41 +00:00
Peter Maydell
6835504887 Merge remote-tracking branch 'remotes/kraxel/tags/pull-audio-20170301-1' into staging
audio: replay support, sdl2 fix.

# gpg: Signature made Wed 01 Mar 2017 15:38:09 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-audio-20170301-1:
  audio/sdlaudio: Allow audio playback with SDL2
  audio: make audio poll timer deterministic
  replay: add record/replay for audio passthrough

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 20:31:49 +00:00
Peter Maydell
b49d31a05a Merge remote-tracking branch 'remotes/kraxel/tags/pull-docs-20170301-1' into staging
docs: update sample configuration files

# gpg: Signature made Wed 01 Mar 2017 13:43:34 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-docs-20170301-1:
  mach-virt: Provide sample configuration files
  q35: Improve sample configuration files

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 19:27:30 +00:00
Peter Maydell
251501a371 Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20170228a' into staging
Migration pull

Note: The 'postcopy: Update userfaultfd.h header' is part of
Paolo's header update and will disappear if applied after it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

# gpg: Signature made Tue 28 Feb 2017 12:38:34 GMT
# gpg:                using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20170228a: (27 commits)
  postcopy: Add extra check for COPY function
  postcopy: Add doc about hugepages and postcopy
  postcopy: Check for userfault+hugepage feature
  postcopy: Update userfaultfd.h header
  postcopy: Allow hugepages
  postcopy: Send whole huge pages
  postcopy: Mask fault addresses to huge page boundary
  postcopy: Load huge pages in one go
  postcopy: Use temporary for placing zero huge pages
  postcopy: Plumb pagesize down into place helpers
  postcopy: Record largest page size
  postcopy: enhance ram_block_discard_range for hugepages
  exec: ram_block_discard_range
  postcopy: Chunk discards for hugepages
  postcopy: Transmit and compare individual page sizes
  postcopy: Transmit ram size summary word
  migration: fix use-after-free of to_dst_file
  migration: Update docs to discourage version bumps
  migration: fix id leak regression
  migrate: Introduce a 'dc->vmsd' check to avoid segfault for --only-migratable
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 17:39:12 +00:00
Peter Maydell
c9fc677a35 Merge remote-tracking branch 'remotes/elmarco/tags/leak-pull-request' into staging
# gpg: Signature made Wed 01 Mar 2017 09:02:53 GMT
# gpg:                using RSA key 0xDAE8E10975969CE5
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>"
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* remotes/elmarco/tags/leak-pull-request: (28 commits)
  tests: fix virtio-blk-test leaks
  tests: add specialized device_find function
  tests: fix usb-test leaks
  tests: allows to run single test in usb-hcd-ehci-test
  usb: release the created buses
  bus: do not unref hotplug handler
  tests: fix virtio-9p-test leaks
  tests: fix virtio-scsi-test leak
  tests: fix e1000e leaks
  tests: fix i440fx-test leaks
  tests: fix e1000-test leak
  tests: fix tco-test leaks
  tests: fix eepro100-test leak
  pc: pcihp: avoid adding ACPI_PCIHP_PROP_BSEL twice
  tests: fix ipmi-bt-test leak
  tests: fix ipmi-kcs-test leak
  tests: fix bios-tables-test leak
  tests: fix hd-geo-test leaks
  tests: fix ide-test leaks
  tests: fix vhost-user-test leaks
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 15:25:37 +00:00
Peter Maydell
ab711e216b Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.9-20170301' into staging
ppc patch queue for 2017-03-01

I was hoping to get this pull request squeezed in before the soft
freeze, but I ran into some difficulties during testing.  Everything
here was at least posted before the soft freeze, so I'm hoping we can
still merge it for 2.9.

The biggest things here are:
    * Cleanups to handling of hashed page tables, that will make
      adding support for the POWER9 MMU easier
    * Cleanups to the XICS interrupt controller that will make
      implementing the powernv machine easier
    * TCG implementation of extended overflow and carry handling for
      POWER9

It also includes:
    * Increasing the CPU limit for pseries to 1024 vCPUs
    * Generating proper OF node names in qemu (making hotplug and
      coldplug logic closer together)

# gpg: Signature made Wed 01 Mar 2017 04:43:06 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.9-20170301: (50 commits)
  Add PowerPC 32-bit guest memory dump support
  ppc/xics: rename 'ICPState *' variables to 'icp'
  ppc/xics: move InterruptStatsProvider to the sPAPR machine
  ppc/xics: move ics-simple post_load under the machine
  ppc/xics: remove the XICSState classes
  ppc/xics: export the XICS init routines
  ppc/xics: move the ICP array under the sPAPR machine
  ppc/xics: register the reset handler of ICP objects
  ppc/xics: simplify spapr_dt_xics() interface
  ppc/xics: use the QOM interface to grab an ICP
  ppc/xics: move the cpu_setup() handler under the ICPState class
  ppc/xics: simplify the cpu_setup() handler
  ppc/xics: move kernel_xics_fd out of KVMXICSState
  ppc/xics: extend the QOM interface to handle ICPs
  ppc/xics: remove the XICS list of ICS
  ppc/xics: register the reset handler of ICS objects
  ppc/xics: remove xics_find_source()
  ppc/xics: use the QOM interface to resend irqs
  ppc/xics: use the QOM interface to get irqs
  ppc/xics: use the QOM interface under the sPAPR machine
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 13:50:55 +00:00
Peter Maydell
4bc0d39a2f Merge remote-tracking branch 'remotes/mcayland/tags/qemu-openbios-signed' into staging
Update OpenBIOS images

# gpg: Signature made Tue 28 Feb 2017 22:09:11 GMT
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* remotes/mcayland/tags/qemu-openbios-signed:
  Update OpenBIOS images to 0cd97cc built from submodule.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 12:30:11 +00:00
Peter Maydell
666095c852 Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
x86 queue, 2017-02-27

"-cpu max" and query-cpu-model-expansion support for x86. This
should be the last x86 pull request before 2.9 soft freeze.

# gpg: Signature made Mon 27 Feb 2017 16:24:15 GMT
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request:
  i386: Improve query-cpu-model-expansion full mode
  i386: Implement query-cpu-model-expansion QMP command
  i386: Define static "base" CPU model
  i386: Don't set CPUClass::cpu_def on "max" model
  i386: Make "max" model not use any host CPUID info on TCG
  i386: Create "max" CPU model
  qapi-schema: Comment about full expansion of non-migration-safe models
  i386: Reorganize and document CPUID initialization steps
  i386: Rename X86CPU::host_features to X86CPU::max_features
  i386: Add ordering field to CPUClass
  i386: Unset cannot_destroy_with_object_finalize_yet on "host" model

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 11:18:01 +00:00
Peter Maydell
f1d640524d Merge remote-tracking branch 'remotes/kraxel/tags/pull-seabios-20170228-1' into staging
seabios: update to 1.10.2 release

# gpg: Signature made Tue 28 Feb 2017 08:57:57 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-seabios-20170228-1:
  seabios: update to 1.10.2 release

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 10:17:45 +00:00
Peter Maydell
d377b80338 Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170301' into staging
Queued TCG patch

# gpg: Signature made Tue 28 Feb 2017 21:30:32 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20170301:
  aarch64: Change ext type to TCGType to fix warnings

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-02 08:35:14 +00:00
Marcel Apfelbaum
077dd74239 hw/pxb-pcie: fix PCI Express hotplug support
Add the missing osc method for pxb-pcie devices as APCI spec recommends,
see 6.2.9.1 OSC Implementation Example for PCI Host Bridge Devices, ACPI 3.0a:

    It is recommended that a machine with multiple host bridge devices
    should report the same capabilities for all host bridges, and also
    negotiate control of the features described in the Control Field in
    the same way for all host bridges.

Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-02 07:31:26 +02:00
Michael S. Tsirkin
5cb206b58d tests/acpi: update DSDT after last patch
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-02 07:31:26 +02:00
Michael S. Tsirkin
b3c782db20 acpi: simplify _OSC
Our _OSC method has a bunch of unused code loading data
into external CTRL and SUPP fields which are then never
used. Drop this in favor of a single local variable.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
2017-03-02 07:29:10 +02:00
Jason Wang
96a8821d21 virtio: unbreak virtio-pci with IOMMU after caching ring translations
Commit c611c76417 ("virtio: add MemoryListener to cache ring
translations") registers a memory listener to dma_as. This may not
work when IOMMU is enabled: dma_as(bus_master_as) were initialized in
pcibus_machine_done() after virtio_realize(). This will cause a
segfault. Fixing this by using pci_device_iommu_address_space()
instead to make sure address space were initialized at this time.

With this fix, IOMMU device were required to be initialized before any
virtio-pci devices.

Fixes: c611c76417 ("virtio: add MemoryListener to cache ring translations")
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-02 07:14:28 +02:00
Stefan Hajnoczi
874adf45db virtio: add missing region cache init in virtio_load()
Commit 97cd965c07 ("virtio: use
VRingMemoryRegionCaches for avail and used rings") switched to a memory
region cache to avoid repeated map/unmap operations.

The virtio_load() process is a little tricky because vring addresses are
serialized in two separate places.  VIRTIO 1.0 devices serialize desc
and then a subsection with used and avail.  Legacy devices only
serialize desc.

Live migration of VIRTIO 1.0 devices fails on the destination host with:

  VQ 0 size 0x80 < last_avail_idx 0x12f8 - used_idx 0x0
  Failed to load virtio-blk:virtio
  error while loading state for instance 0x0 of device '0000:00:04.0/virtio-blk'

This happens because the memory region cache is only initialized after
desc is loaded and not after the used and avail subsection is loaded.
If the guest chose memory addresses that don't match the legacy ring
layout then the wrong guest memory location is accessed.

Wait until all ring addresses are known before trying to initialize the
region cache.  Also clarify the incomplete comment about VIRTIO-1 ring
address subsection.

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
2017-03-02 07:14:28 +02:00
Stefan Hajnoczi
3cdf847329 virtio: invalidate memory in vring_set_avail_event()
Remember to invalidate the avail event field so the memory pages are
marked dirty.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
2017-03-02 07:14:27 +02:00
Cornelia Huck
34c6bf22a8 virtio: guard vring access when setting notification
Switching to vring caches exposed an existing bug in
virtio_queue_set_notification(): We can't access vring structures
if they have not been set up yet. This may happen, for example,
for virtio-blk devices with multiple queues: The code will try to
switch notifiers for every queue, but the guest may have only set up
a subset of them.

Fix this by guarding access to the vring memory by checking for
vring.desc. The first aio poll will iron out any remaining
inconsistencies for later-configured queues (buggy legacy drivers).

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-02 07:14:27 +02:00
Paolo Bonzini
dd3dd4ba7b virtio: check for vring setup in virtio_queue_empty
If the vring has not been set up, there is nothing in the virtqueue.
virtio_queue_host_notifier_aio_poll calls virtio_queue_empty even in
this case; we have to filter it out just like virtio_queue_notify_aio_vq.

Reported-by: Gerd Hoffmann <kraxel@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-03-02 07:14:27 +02:00
Ben Warren
42697d88de MAINTAINERS: Add VM Generation ID entries
Signed-off-by: Ben Warren <ben@skyportsystems.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
2017-03-02 07:14:27 +02:00
Ben Warren
3248f1b4e0 tests: Move reusable ACPI code into a utility file
Also usable by upcoming VM Generation ID tests

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
2017-03-02 07:14:27 +02:00
Igor Mammedov
39164c136c qmp/hmp: add query-vm-generation-id and 'info vm-generation-id' commands
Add commands to query Virtual Machine Generation ID counter.

QMP command example:
    { "execute": "query-vm-generation-id" }

HMP command example:
    info vm-generation-id

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Ben Warren <ben@skyportsystems.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-02 07:14:27 +02:00
Ben Warren
d03637bcfb ACPI: Add Virtual Machine Generation ID support
This implements the VM Generation ID feature by passing a 128-bit
GUID to the guest via a fw_cfg blob.
Any time the GUID changes, an ACPI notify event is sent to the guest

The user interface is a simple device with one parameter:
 - guid (string, must be "auto" or in UUID format
   xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-02 07:14:27 +02:00
Ben Warren
c7809e6cd7 ACPI: Add vmgenid blob storage to the build tables
This allows them to be centrally initialized and destroyed

The "AcpiBuildTables.vmgenid" array will be used to construct the
"etc/vmgenid_guid" fw_cfg blob.

Its contents will be linked into fw_cfg after being built on the
pc_machine_done() -> acpi_setup() -> acpi_build() call path, and dropped
without use on the subsequent, guest triggered, acpi_build_update() ->
acpi_build() call path.

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-02 07:14:26 +02:00
Ben Warren
20f5d14dc9 docs: VM Generation ID device description
This patch is based off an earlier version by
Gal Hammer (ghammer@redhat.com)

Requirements section, ASCII diagrams and overall help
provided by Laszlo Ersek (lersek@redhat.com)

Signed-off-by: Gal Hammer <ghammer@redhat.com>
Signed-off-by: Ben Warren <ben@skyportsystems.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-02 07:14:26 +02:00
Ben Warren
489886d118 linker-loader: Add new 'write pointer' command
This is similar to the existing 'add pointer' functionality, but instead
of instructing the guest (BIOS or UEFI) to patch memory, it instructs
the guest to write the pointer back to QEMU via a writeable fw_cfg file.

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-03-02 07:14:26 +02:00
Jeff Cody
0a55679b4a block/rbd: add support for 'mon_host', 'auth_supported' via QAPI
This adds support for three additional options that may be specified
by QAPI in blockdev-add:

    server: host, port
    auth method: either 'cephx' or 'none'

The "server" and "auth-supported" QAPI parameters are arrays.  To conform
with the rados API, the array items are join as a single string with a ';'
character as a delimiter when setting the configuration values.

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-03-01 22:39:25 -05:00
Peter Maydell
b9fe31392b Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Tue 28 Feb 2017 20:35:32 GMT
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (46 commits)
  block: Add Error parameter to bdrv_append()
  block: Add Error parameter to bdrv_set_backing_hd()
  block: Assertions for resize permission
  block: Assertions for write permissions
  block: Pass BdrvChild to bdrv_aligned_preadv/pwritev and copy-on-read
  tests: Remove FIXME comments
  nbd/server: Use real permissions for NBD exports
  migration/block: Use real permissions
  hmp: Request permissions in qemu-io
  commit: Add filter-node-name to block-commit
  mirror: Add filter-node-name to blockdev-mirror
  stream: Use real permissions in streaming block job
  mirror: Use real permissions in mirror/active commit block job
  blockjob: Factor out block_job_remove_all_bdrv()
  block: Allow backing file links in change_parent_backing_link()
  block: BdrvChildRole.attach/detach() callbacks
  block: Fix pending requests check in bdrv_append()
  backup: Use real permissions in backup block job
  commit: Use real permissions for HMP 'commit'
  commit: Use real permissions in commit block job
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 23:09:46 +00:00
Peter Maydell
1e0addb682 Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20170228-tag' into staging
Xen 2017/02/28

# gpg: Signature made Tue 28 Feb 2017 19:13:08 GMT
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <sstabellini@kernel.org>"
# gpg:                 aka "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20170228-tag:
  Add a new qmp command to do checkpoint, query xen replication status
  Add a new qmp command to start/stop replication

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 20:33:47 +00:00
Richard Henderson
3db010c339 target/sparc: Restore ldstub of odd asis
Fixes the booting of ss20 roms.

Cc: qemu-stable@nongnu.org
Reported-by: Michael Russo <mike@papersolve.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-03-02 06:52:43 +11:00
Peter Maydell
b28f9db1a7 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170228-1' into staging
target-arm queue:
 * raspi2: add gpio controller and sdhost controller, with
   the wiring so the guest can switch which controller the
   SD card is attached to
   (this is sufficient to get raspbian kernels to boot)
 * GICv3: support state save/restore from KVM
 * update Linux headers to 4.11
 * refactor and QOMify the ARMv7M container object

# gpg: Signature made Tue 28 Feb 2017 17:11:49 GMT
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170228-1: (21 commits)
  bcm2835: add sdhost and gpio controllers
  bcm2835_gpio: add bcm2835 gpio controller
  hw/sd: add card-reparenting function
  qdev: Have qdev_set_parent_bus() handle devices already on a bus
  hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers
  target-arm: Add GICv3CPUState in CPUARMState struct
  hw/intc/arm_gicv3_kvm: Implement get/put functions
  hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate
  update Linux headers to 4.11
  update-linux-headers: update for 4.11
  stm32f205: Rename 'nvic' local to 'armv7m'
  stm32f205: Create armv7m object without using armv7m_init()
  armv7m: Split systick out from NVIC
  armv7m: Don't put core v7M devices under CONFIG_STELLARIS
  armv7m: Make bitband device take the address space to access
  armv7m: Make NVIC expose a memory region rather than mapping itself
  armv7m: Make ARMv7M object take memory region link
  armv7m: Use QOMified armv7m object in armv7m_init()
  armv7m: QOMify the armv7m container
  armv7m: Move NVICState struct definition into header
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 17:58:54 +00:00
Thomas Huth
bcf19777df audio/sdlaudio: Allow audio playback with SDL2
When compiling with SDL2, the semaphore trick used in sdlaudio.c
does not work - QEMU locks up completely in this case. To avoid
the hang and get at least some audio playback up and running (it's
a little bit crackling, but better than nothing), we can use the
SDL locking functions SDL_LockAudio() and SDL_UnlockAudio() to sync
with the sound playback thread instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-id: 1485852398-2327-1-git-send-email-thuth@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:12:03 +01:00
Pavel Dovgalyuk
1ffc266539 audio: make audio poll timer deterministic
This patch changes resetting strategy of the audio polling timer.
It does not change expiration time if the timer is already set.
This patch is needed to make this timer deterministic and to use execution
record/replay for audio devices.

audio_reset_timer is used in the function audio_vm_change_state_handler.
Therefore every time VM is stopped or restarted the timer will be reset
to new timeout. Virtual clock does not proceed while VM is stopped.
Therefore there is no need in resetting the timeout when VM restarts.

v2: updated commit message
v3: now using timer_mod_anticipate function (as suggested by Yurii Zubrytskyi)

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-id: 20170214071510.6112.76764.stgit@PASHA-ISP
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:12:03 +01:00
Pavel Dovgalyuk
3d4d16f4dc replay: add record/replay for audio passthrough
This patch adds recording and replaying audio data. Is saves synchronization
information for audio out and inputs from the microphone.

v2: removed unneeded whitespace change

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-id: 20170202055054.4848.94901.stgit@PASHA-ISP.lan02.inno

[ kraxel: add qemu/error-report.h include to fix osx build failure ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:11:44 +01:00
Peter Maydell
7287e3556f Merge remote-tracking branch 'remotes/gkurz/tags/cve-2016-9602-for-upstream' into staging
This pull request have all the fixes for CVE-2016-9602, so that it can
be easily picked up by downstreams, as suggested by Michel Tokarev.

# gpg: Signature made Tue 28 Feb 2017 10:21:32 GMT
# gpg:                using DSA key 0x02FC3AEB0101DBC2
# gpg: Good signature from "Greg Kurz <groug@kaod.org>"
# gpg:                 aka "Greg Kurz <groug@free.fr>"
# gpg:                 aka "Greg Kurz <gkurz@linux.vnet.ibm.com>"
# gpg:                 aka "Gregory Kurz (Groug) <groug@free.fr>"
# gpg:                 aka "[jpeg image of size 3330]"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2BD4 3B44 535E C0A7 9894  DBA2 02FC 3AEB 0101 DBC2

* remotes/gkurz/tags/cve-2016-9602-for-upstream: (28 commits)
  9pfs: local: drop unused code
  9pfs: local: open2: don't follow symlinks
  9pfs: local: mkdir: don't follow symlinks
  9pfs: local: mknod: don't follow symlinks
  9pfs: local: symlink: don't follow symlinks
  9pfs: local: chown: don't follow symlinks
  9pfs: local: chmod: don't follow symlinks
  9pfs: local: link: don't follow symlinks
  9pfs: local: improve error handling in link op
  9pfs: local: rename: use renameat
  9pfs: local: renameat: don't follow symlinks
  9pfs: local: lstat: don't follow symlinks
  9pfs: local: readlink: don't follow symlinks
  9pfs: local: truncate: don't follow symlinks
  9pfs: local: statfs: don't follow symlinks
  9pfs: local: utimensat: don't follow symlinks
  9pfs: local: remove: don't follow symlinks
  9pfs: local: unlinkat: don't follow symlinks
  9pfs: local: lremovexattr: don't follow symlinks
  9pfs: local: lsetxattr: don't follow symlinks
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 13:53:20 +00:00
Andrea Bolognani
166d434685 mach-virt: Provide sample configuration files
These are very much like the sample configuration files
for q35, and can be used both as documentation and as
a starting point for creating your own guest.

Two sample configuration files are provided:

  * mach-virt-graphical.cfg can be used to start a
    fully-featured (USB, graphical console, etc.)
    guest that uses VirtIO devices;

  * mach-virt-serial.cfg is similar but has a minimal
    set of devices and uses the serial console.

All configuration files are fully commented and neatly
organized.

Signed-off-by: Andrea Bolognani <abologna@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Message-id: 1487326479-8664-3-git-send-email-abologna@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 14:40:40 +01:00
Andrea Bolognani
9ca019c1dd q35: Improve sample configuration files
Instead of having a single sample configuration file,
we now have several:

  * q35-emulated.cfg documents the default devices QEMU
    adds to a q35 guest and the additional devices that
    are pretty much guaranteed to be present in a
    physical q35-based machine;

  * q35-virtio-graphical.cfg can be used to start a
    fully-featured (USB, graphical console, audio, etc.)
    guest that uses VirtIO instead of emulated devices;

  * q35-virtio-serial.cfg is similar but has a minimal
    set of devices and uses the serial console.

All configuration files are fully commented and neatly
organized.

Signed-off-by: Andrea Bolognani <abologna@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Message-id: 1487326479-8664-2-git-send-email-abologna@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 14:40:40 +01:00
Peter Maydell
e3280ffbf5 Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
# gpg: Signature made Tue 28 Feb 2017 12:40:00 GMT
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  .shippable: add s390x-cross target
  new: dockerfiles/debian-s390-cross

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-03-01 13:06:00 +00:00
Marc-André Lureau
80e1eea37a tests: fix virtio-blk-test leaks
Use qvirtio_pci_device_find_slot() to avoid leaking the non-hp
device. Add assert() to avoid further leaks in the future.

Use qvirtio_pci_device_free() to correctly free QVirtioPCIDevice.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:58:57 +04:00
Marc-André Lureau
c4523aae06 tests: add specialized device_find function
Allow specifying which slot to look for the device.

This will be used in the following patch to avoid leaking when multiple
devices exists and we want to lookup the hotplug one.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:57:04 +04:00
Marc-André Lureau
62030ed135 tests: fix usb-test leaks
Fix the usb tests leaks.

Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 11:51:29 +04:00
Marc-André Lureau
d3510ff9d7 tests: allows to run single test in usb-hcd-ehci-test
pci_init() shouldn't be a test function, but instead called before any
test. This allows to run a single test with -p /x86_64/ehci/....

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 11:51:29 +04:00
Marc-André Lureau
cd7bc87868 usb: release the created buses
Leaks spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 11:51:29 +04:00
Marc-André Lureau
675f22c6d3 bus: do not unref hotplug handler
Apparently, none of the bus owner give a reference to the hotplug
handler property, do not unref it on bus release.

Furthermore, a bus is allowed to be its own hotplug handler, which can
be seen in qbus_set_bus_hotplug_handler() function. However, in this
case, the reference can't be given to the property, or this will create
a cyclic dependency and the bus will never be free.

Each bus owner should manage the lifecycle of the hotplug handler.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
2b880bcdbe tests: fix virtio-9p-test leaks
Spotted by ASAN.

Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
3caab54d08 tests: fix virtio-scsi-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
448fe3c134 tests: fix e1000e leaks
Spotted by ASAN.

This hunk adds an assertion. It checks that we're finding no more than
one e1000e device: each hit allocates, but there is only one g_free().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
1bab33ab4a tests: fix i440fx-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
8829e16f5e tests: fix e1000-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 11:51:28 +04:00
Marc-André Lureau
34779e8c39 tests: fix tco-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:51:25 +04:00
Marc-André Lureau
6afff1ffa3 tests: fix eepro100-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Weil <sw@weilnetz.de>
2017-03-01 11:51:05 +04:00
Igor Mammedov
f0c9d64a68 pc: pcihp: avoid adding ACPI_PCIHP_PROP_BSEL twice
PCI hotplug for bridges was introduced only since 2.0 however
  acpi_set_bsel()->object_property_add_uint32_ptr(bus, ACPI_PCIHP_PROP_BSEL)
didn't take in account that for legacy mode (1.7) when
PCI hotplug for bridges is unavailable and ACPI_PCIHP_PROP_BSEL property
the only bus "PCI.0' has been created earlier at acpi_pcihp_init() time.

We managed to live with it only because of error rised by adding
a duplicate property in acpi_set_bsel() has been ignored which
resulted in useless leaking of just allocated (int)bus_bsel.

Issue affects only 1.7 machine type as ACPI tables supported by
QEMU were introduced at that time, but there wasn't PCI hotplug
for bridges till the next release (2.0).

Fix it by removing duplicate ACPI_PCIHP_PROP_BSEL intialization
in acpi_pcihp_init() and doing it only in one place acpi_set_pci_info().

PS:
do not ignore error returned by object_property_add_uint32_ptr()
and abort QEMU since it's programming error which should be fixed
instead of being ignored.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reported-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1470211497-116801-1-git-send-email-imammedo@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
[ Marc-André - Remove now unused ACPI_PCIHP_LEGACY_SIZE ]
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
2017-03-01 11:51:05 +04:00
Marc-André Lureau
2607b660e9 tests: fix ipmi-bt-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 11:51:05 +04:00
Marc-André Lureau
3f5f5d04cd tests: fix ipmi-kcs-test leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 11:51:05 +04:00
Marc-André Lureau
f11dc27bcc tests: fix bios-tables-test leak
The inside array should be free too.
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:04 +04:00
Marc-André Lureau
2c8f86961b tests: fix hd-geo-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2017-03-01 11:51:04 +04:00
Marc-André Lureau
f5aa4bdc76 tests: fix ide-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-03-01 11:51:04 +04:00
Marc-André Lureau
0c0eb30260 tests: fix vhost-user-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:51:00 +04:00
Marc-André Lureau
fb6faea888 tests: fix q35-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 11:50:33 +04:00
Mike Nawrocki
356bb70ed1 Add PowerPC 32-bit guest memory dump support
This patch extends support for the `dump-guest-memory` command to the
32-bit PowerPC architecture. It relies on the assumption that a 64-bit
guest will not dump a 32-bit core file (and vice versa).

[dwg: I suspect this patch won't cover all cases, in particular a
32-bit machine type on a 64-bit qemu build.  However, it does strictly
more than what we had before, so might as well apply as a starting
point]

Signed-off-by: Mike Nawrocki <michael.nawrocki@gtri.gatech.edu>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:53:58 +11:00
Cédric Le Goater
8e4fba203e ppc/xics: rename 'ICPState *' variables to 'icp'
'ICPState *' variables are currently named 'ss'. This is confusing, so
let's give them an appropriate name: 'icp'.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
6449da4545 ppc/xics: move InterruptStatsProvider to the sPAPR machine
It provides a better monitor output of the ICP and ICS objects, else
the objects are printed out of order.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
a7ff1212e9 ppc/xics: move ics-simple post_load under the machine
The ICS object uses a post_load() handler which is implicitly relying
on the fact that the internal state of the ICS and ICP objects has
been restored but this is not guaranteed. So, let's move the code
under the post_load() handler of the machine where we know the objects
have been fully restored.

The icp_resend() handler of the XICSFabric QOM interface is also
removed as it is now obsolete.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
e6f7e110ee ppc/xics: remove the XICSState classes
The XICSState classes are not used anymore. They have now been fully
deprecated by the XICSFabric QOM interface. Do the cleanups.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
2192a9303d ppc/xics: export the XICS init routines
There is nothing left related to the XICS object in the realize
functions of the KVMXICSState and XICSState class. So adapt the
interfaces to call these routines directly from the sPAPR machine init
sequence.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
852ad27e14 ppc/xics: move the ICP array under the sPAPR machine
This is the last step to remove the XICSState abstraction and have the
machine hold all the objects related to interrupts : ICSs and ICPs.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
20147f2fce ppc/xics: register the reset handler of ICP objects
The reset of the ICP objects is currently handled by XICS but this can
be done for each individual ICP.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:40 +11:00
Cédric Le Goater
b0ec31290c ppc/xics: simplify spapr_dt_xics() interface
spapr_dt_xics() only needs the number of servers to build the device
tree nodes. Let's change the routine interface to reflect that.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
b4f27d71e3 ppc/xics: use the QOM interface to grab an ICP
Also introduce a xics_icp_get() helper to simplify the changes.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
f023243432 ppc/xics: move the cpu_setup() handler under the ICPState class
The cpu_setup() handler is currently under the XICSState class but it
really belongs under ICPState as it is setting up an individual vCPU.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
bf50860d1b ppc/xics: simplify the cpu_setup() handler
The cpu_setup() handler currently takes a 'XICSState *' argument to
grab the kernel ICP file descriptor. This interface can be simplified
by using the 'xics' backlink of the ICP object.

This change is also required by subsequent patches which makes use of
the QOM interface for XICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
729f8a4f48 ppc/xics: move kernel_xics_fd out of KVMXICSState
The kernel ICP file descriptor is the only reason behind the
KVMXICSState class and it's in the way of more cleanups. Let's make it
a static for the moment and move forward.

If this is problem, we could use an attribute under the sPAPR machine
later on.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
b2fc59aaf9 ppc/xics: extend the QOM interface to handle ICPs
Let's add two new handlers for ICPs. One is to get an ICP object from
a server number and a second is to resend the irqs when needed.

The icp_resend() handler is a temporary workaround needed by the
ics-simple post_load() handler. It will be removed when the post_load
portion can be done at the machine level.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
d114a66225 ppc/xics: remove the XICS list of ICS
This is not used anymore.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
c79b2fdd7b ppc/xics: register the reset handler of ICS objects
The reset of the ICS objects is currently handled by XICS but this can
be done for each individual ICS. This also reduces the use of the XICS
list of ICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
be1fe35199 ppc/xics: remove xics_find_source()
It is not used anymore now that we have the QOM interface for XICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
2cd908d0ad ppc/xics: use the QOM interface to resend irqs
Also change the ICPState 'xics' backlink to be a XICSFabric, this
removes the need of using qdev_get_machine() to get the QOM interface
in some of the routines.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
f7759e4331 ppc/xics: use the QOM interface to get irqs
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
7844e12b28 ppc/xics: use the QOM interface under the sPAPR machine
Add 'ics_get' and 'ics_resend' handlers to the sPAPR machine. These
are relatively simple for a single ICS.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
51b180051e ppc/xics: introduce a XICSFabric QOM interface to handle ICSs
This interface provides two simple handlers. One is to get an ICS
(Interrupt Source Controller) object from an irq number and a second
to resend the irqs when needed.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
b9038e7806 ppc/xics: add an InterruptStatsProvider interface to ICS and ICP objects
This is, again, to reduce the use of the list of ICS objects. Let's
make each individual ICS and ICP object an InterruptStatsProvider and
remove this same interface from XICSState.

The InterruptStatsProvider will be moved at the machine level after
the XICS cleanups are completed.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
681bfaded6 ppc/xics: store the ICS object under the sPAPR machine
A list of ICS objects was introduced under the XICS object for the
PowerNV machine but, for the sPAPR machine, it brings extra complexity
as there is only a single ICS. To simplify the code, let's add the ICS
pointer under the sPAPR machine and try to reduce the use of this list
where possible.

Also, change the xics_spapr_*() routines to use an ICS object instead
of an XICSState and change their name to reflect that these are
specific to the sPAPR ICS object.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
817bb6a446 ppc/xics: remove set_nr_servers() handler from XICSStateClass
Today, the ICP (Interrupt Controller Presenter) objects are created by
the 'nr_servers' property handler of the XICS object and a class
handler. They are realized in the XICS object realize routine.

Let's simplify the process by creating the ICP objects along with the
XICS object at the machine level.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Cédric Le Goater
4e4169f7a2 ppc/xics: remove set_nr_irqs() handler from XICSStateClass
Today, the ICS (Interrupt Controller Source) object is created and
realized by the init and realize routines of the XICS object, but some
of the parameters are only known at the machine level.

These parameters are passed from the sPAPR machine to the ICS object
in a rather convoluted way using property handlers and a class handler
of the XICS object. The number of irqs required to allocate the IRQ
state objects in the ICS realize routine is one of them.

Let's simplify the process by creating the ICS object along with the
XICS object at the machine level and link the ICS into the XICS list
of ICSs at this level also. In the sPAPR machine, there is only a
single ICS but that will change with the PowerNV machine.

Also, QOMify the creation of the objects and get rid of the
superfluous code.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
738d5db824 xics: XICS should not be a SysBusDevice
Currently xics - the component of the IBM POWER interrupt controller
representing the overall interrupt fabric / architecture is
represented as a descendent of SysBusDevice.  However, this is not
really correct - the xics presents nothing in MMIO space so it should
be an "unattached" device in the current QOM model.

Since this device will always be created by the machine type, not created
specifically from the command line, and because it has no migrated state
it should be safe to move it around the device composition tree.

Therefore this patch changes it to a descendent of TYPE_DEVICE, and
makes it an unattached device.  So that its reset handler still gets
called correctly, we add a qdev_set_parent_bus() to attach it to
sysbus.  It's not really clear that's correct (instead of using
register_reset()) but it appears to a common technique.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
[clg corrected problems with reset]
Signed-off-by: Cédric Le Goater <clg@kaod.org>
[dwg folded together and updated commit message]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Greg Kurz
a8eeafda19 spapr/pci: populate PCI DT in reverse order
Since commit 1d2d974244 "spapr_pci: enumerate and add PCI device tree", QEMU
populates the PCI device tree in the opposite order compared to SLOF.

Before 1d2d974244:

Populating /pci@800000020000000
                     00 0000 (D) : 1af4 1000    virtio [ net ]
                     00 0800 (D) : 1af4 1001    virtio [ block ]
                     00 1000 (D) : 1af4 1009    virtio [ network ]
Populating /pci@800000020000000/unknown-legacy-device@2

7e5294b8 :  /pci@800000020000000
7e52b998 :  |-- ethernet@0
7e52c0c8 :  |-- scsi@1
7e52c7e8 :  +-- unknown-legacy-device@2 ok

Since 1d2d974244:

Populating /pci@800000020000000
                     00 1000 (D) : 1af4 1009    virtio [ network ]
Populating /pci@800000020000000/unknown-legacy-device@2
                     00 0800 (D) : 1af4 1001    virtio [ block ]
                     00 0000 (D) : 1af4 1000    virtio [ net ]

7e5e8118 :  /pci@800000020000000
7e5ea6a0 :  |-- unknown-legacy-device@2
7e5eadb8 :  |-- scsi@1
7e5eb4d8 :  +-- ethernet@0 ok

This behaviour change is not actually a bug since no assumptions should be
made on DT ordering. But it has no real justification either, other than
being the consequence of the way fdt_add_subnode() inserts new elements
to the front of the FDT rather than adding them to the tail.

This patch reverts to the historical SLOF ordering by walking PCI devices
in reverse order. This reconciles pseries with x86 machine types behavior.
It is expected to make things easier when porting existing applications to
power.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Tested-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
(slight update to the changelog)
Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
b63d043418 target/ppc: add mcrxrx instruction
mcrxrx: Move to CR from XER Extended

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
c44027ffb9 target/ppc: add ov32 flag in divide operations
Add helper_div_compute_ov() in the int_helper for updating the overflow
flags.

For Divide Word:
SO, OV, and OV32 bits reflects overflow of the 32-bit result

For Divide DoubleWord:
SO, OV, and OV32 bits reflects overflow of the 64-bit result

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
61aa9a697a target/ppc: add ov32 flag for multiply low insns
For Multiply Word:
SO, OV, and OV32 bits reflects overflow of the 32-bit result

For Multiply DoubleWord:
SO, OV, and OV32 bits reflects overflow of the 64-bit result

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
1480d71cbe target/ppc: use tcg ops for neg instruction
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
dc0ad84449 target/ppc: update overflow flags for add/sub
* SO and OV reflects overflow of the 64-bit result in 64-bit mode and
  overflow of the low-order 32-bit result in 32-bit mode

* OV32 reflects overflow of the low-order 32-bit independent of the mode

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
33903d0aa4 target/ppc: update ca32 in arithmetic substract
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
6b10d008a0 target/ppc: update ca32 in arithmetic add
Adds routine to compute ca32 - gen_op_arith_compute_ca32

For 64-bit mode use the compute ca32 routine. While for 32-bit mode, CA
and CA32 will have same value.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
dd09c36159 target/ppc: support for 32-bit carry and overflow
POWER ISA 3.0 adds CA32 and OV32 status in 64-bit mode. Add the flags
and corresponding defines.

Moreover, CA32 is updated when CA is updated and OV32 is updated when OV
is updated.

Arithmetic instructions:
    * Addition and Substractions:

        addic, addic., subfic, addc, subfc, adde, subfe, addme, subfme,
        addze, and subfze always updates CA and CA32.

        => CA reflects the carry out of bit 0 in 64-bit mode and out of
           bit 32 in 32-bit mode.
        => CA32 reflects the carry out of bit 32 independent of the
           mode.

        => SO and OV reflects overflow of the 64-bit result in 64-bit
           mode and overflow of the low-order 32-bit result in 32-bit
           mode
        => OV32 reflects overflow of the low-order 32-bit independent of
           the mode

    * Multiply Low and Divide:

        For mulld, divd, divde, divdu and divdeu: SO, OV, and OV32 bits
        reflects overflow of the 64-bit result

        For mullw, divw, divwe, divwu and divweu: SO, OV, and OV32 bits
        reflects overflow of the 32-bit result

     * Negate with OE=1 (nego)

       For 64-bit mode if the register RA contains
       0x8000_0000_0000_0000, OV and OV32 are set to 1.

       For 32-bit mode if the register RA contains 0x8000_0000, OV and
       OV32 are set to 1.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
e78308fd39 target/ppc: Correct SDR1 masking
SDR_64_HTABORG, which indicates the bits of the SDR1 register to use for
the base of a 64-bit machine's hashed page table (HPT) isn't correct.  It
includes the top 46 bits of the register, but in fact the top 4 bits must
be zero (according to the ISA v2.07).  No actual implementation has
supported close to 2^60 bytes of physical address space, so it's kind of
irrelevant, but we might as well correct this.

In addition, although we checked for bad size values in SDR1, we never
reported an error if entirely invalid bits were set there.  Add this check
to ppc_store_sdr1().

Reported-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Suraj Jitindar Singh
8d63351f9f target/ppc: Remove the function ppc_hash64_set_sdr1()
The function ppc_hash64_set_sdr1 basically checked the htabsize and set an
error if it was too big, otherwise it just stored the value in SPR_SDR1.

Given that the only function which calls ppc_hash64_set_sdr1() is
ppc_store_sdr1(), why not handle the checking in ppc_store_sdr1() to avoid
the extra function call. Note that ppc_store_sdr1() already stores the
value in SPR_SDR1 anyway, so we were doing it twice.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
[dwg: Remove unnecessary error temporary]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
e57ca75ce3 target/ppc: Manage external HPT via virtual hypervisor
The pseries machine type implements the behaviour of a PAPR compliant
hypervisor, without actually executing such a hypervisor on the virtual
CPU.  To do this we need some hooks in the CPU code to make hypervisor
facilities get redirected to the machine instead of emulated internally.

For hypercalls this is managed through the cpu->vhyp field, which points
to a QOM interface with a method implementing the hypercall.

For the hashed page table (HPT) - also a hypervisor resource - we use an
older hack.  CPUPPCState has an 'external_htab' field which when non-NULL
indicates that the HPT is stored in qemu memory, rather than within the
guest's address space.

For consistency - and to make some future extensions easier - this merges
the external HPT mechanism into the vhyp mechanism.  Methods are added
to vhyp for the basic operations the core hash MMU code needs: map_hptes()
and unmap_hptes() for reading the HPT, store_hpte() for updating it and
hpt_mask() to retrieve its size.

To match this, the pseries machine now sets these vhyp fields in its
existing vhyp class, rather than reaching into the cpu object to set the
external_htab field.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
36778660d7 target/ppc: Eliminate htab_base and htab_mask variables
CPUPPCState includes fields htab_base and htab_mask which store the base
address (GPA) and size (as a mask) of the guest's hashed page table (HPT).
These are set when the SDR1 register is updated.

Keeping these in sync with the SDR1 is actually a little bit fiddly, and
probably not useful for performance, since keeping them expands the size of
CPUPPCState.  It also makes some upcoming changes harder to implement.

This patch removes these fields, in favour of calculating them directly
from the SDR1 contents when necessary.

This does make a change to the behaviour of attempting to write a bad value
(invalid HPT size) to the SDR1 with an mtspr instruction.  Previously, the
bad value would be stored in SDR1 and could be retrieved with a later
mfspr, but the HPT size as used by the softmmu would be, clamped to the
allowed values.  Now, writing a bad value is treated as a no-op.  An error
message is printed in both new and old versions.

I'm not sure which behaviour, if either, matches real hardware.  I don't
think it matters that much, since it's pretty clear that if an OS writes
a bad value to SDR1, it's not going to boot.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2017-03-01 11:23:39 +11:00
David Gibson
7222b94a83 target/ppc: Cleanup HPTE accessors for 64-bit hash MMU
Accesses to the hashed page table (HPT) are complicated by the fact that
the HPT could be in one of three places:
   1) Within guest memory - when we're emulating a full guest CPU at the
      hardware level (e.g. powernv, mac99, g3beige)
   2) Within qemu, but outside guest memory - when we're emulating user and
      supervisor instructions within TCG, but instead of emulating
      the CPU's hypervisor mode, we just emulate a hypervisor's behaviour
      (pseries in TCG or KVM-PR)
   3) Within the host kernel - a pseries machine using KVM-HV
      acceleration.  Mostly accesses to the HPT are handled by KVM,
      but there are a few cases where qemu needs to access it via a
      special fd for the purpose.

In order to batch accesses to the fd in case (3), we use a somewhat awkward
ppc_hash64_start_access() / ppc_hash64_stop_access() pair, which for case
(3) reads / releases several HPTEs from the kernel as a batch (usually a
whole PTEG).  For cases (1) & (2) it just returns an address value.  The
actual HPTE load helpers then need to interpret the returned token
differently in the 3 cases.

This patch keeps the same basic structure, but simplfiies the details.
First start_access() / stop_access() are renamed to map_hptes() and
unmap_hptes() to make their operation more obvious.  Second, map_hptes()
now always returns a qemu pointer, which can always be used in the same way
by the load_hpte() helpers.  In case (1) it comes from address_space_map()
in case (2) directly from qemu's HPT buffer and in case (3) from a
temporary buffer read from the KVM fd.

While we're at it, make things a bit more consistent in terms of types and
variable names: avoid variables named 'index' (it shadows index(3) which
can lead to confusing results), use 'hwaddr ptex' for HPTE indices and
uint64_t for each of the HPTE words, use ptex throughout the call stack
instead of pte_offset in some places (we still need that at the bottom
layer, but nowhere else).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
David Gibson
7d6250e3d1 target/ppc: SDR1 is a hypervisor resource
At present the SDR1 register - the base of the system's hashed page table
(HPT) - is represented as an SPR with supervisor read and write permission.
However, on CPUs which have a hypervisor mode, the SDR1 is a hypervisor
only resource.  Change the permission checking on the SPR to reflect this.

Now that this is done, we don't need to check for an external HPT executing
mtsdr1: an external HPT only applies when we're emulating the behaviour of
a hypervisor, rather than modelling the CPU's hypervisor mode internally,
so if we're permitted to execute mtsdr1, we don't have an external HPT.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
b7b0b1f13a target/ppc: Merge cpu_ppc_set_vhyp() with cpu_ppc_set_papr()
cpu_ppc_set_papr() sets up various aspects of CPU state for use with PAPR
paravirtualized guests.  However, it doesn't set the virtual hypervisor,
so callers must also call cpu_ppc_set_vhyp() so that PAPR hypercalls are
handled properly.  This is a bit silly, so fold setting the virtual
hypervisor into cpu_ppc_set_papr().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
c6404adebf pseries: Minor cleanups to HPT management hypercalls
* Standardize on 'ptex' instead of 'pte_index' for HPTE index variables
   for consistency and brevity
 * Avoid variables named 'index'; shadowing index(3) from libc can lead to
   surprising bugs if the variable is removed, because compiler errors
   might not appear for remaining references
 * Clarify index calculations in h_enter() - we have two cases, H_EXACT
   where the exact HPTE slot is given, and !H_EXACT where we search for
   an empty slot within the hash bucket.  Make the calculation more
   consistent between the cases.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
2017-03-01 11:23:39 +11:00
David Gibson
1ad9f0a464 target/ppc: Fix KVM-HV HPTE accessors
When a 'pseries' guest is running with KVM-HV, the guest's hashed page
table (HPT) is stored within the host kernel, so it is not directly
accessible to qemu.  Most of the time, qemu doesn't need to access it:
we're using the hardware MMU, and KVM itself implements the guest
hypercalls for manipulating the HPT.

However, qemu does need access to the in-KVM HPT to implement
get_phys_page_debug() for the benefit of the gdbstub, and maybe for
other debug operations.

To allow this, 7c43bca "target-ppc: Fix page table lookup with kvm
enabled" added kvmppc_hash64_read_pteg() to target/ppc/kvm.c to read
in a batch of HPTEs from the KVM table.  Unfortunately, there are a
couple of problems with this:

First, the name of the function implies it always reads a whole PTEG
from the HPT, but in fact in some cases it's used to grab individual
HPTEs (which ends up pulling 8 HPTEs, not aligned to a PTEG from the
kernel).

Second, and more importantly, the code to read the HPTEs from KVM is
simply wrong, in general.  The data from the fd that KVM provides is
designed mostly for compact migration rather than this sort of one-off
access, and so needs some decoding for this purpose.  The current code
will work in some cases, but if there are invalid HPTEs then it will
not get sane results.

This patch rewrite the HPTE reading function to have a simpler
interface (just read n HPTEs into a caller provided buffer), and to
correctly decode the stream from the kernel.

For consistency we also clean up the similar function for altering
HPTEs within KVM (introduced in c138593 "target-ppc: Update
ppc_hash64_store_hpte to support updating in-kernel htab").

Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Greg Kurz
6244bb7e58 sysemu: support up to 1024 vCPUs
Some systems can already provide more than 255 hardware threads.

Bumping the QEMU limit to 1024 seems reasonable:
- it has no visible overhead in top;
- the limit itself has no effect on hot paths.

Cc: Greg Kurz <gkurz@linux.vnet.ibm.com>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Nikunj A Dadhania
f32899de97 target/ppc: introduce helper_update_ov_legacy
Removes duplicate code and will be useful for consolidating flags

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Laurent Vivier
2530a1a5cf spapr: generate DT node names
When DT node names for PCI devices are generated by SLOF,
they are generated according to the type of the device
(for instance, ethernet for virtio-net-pci device).

Node name for hotplugged devices is generated by QEMU.
This patch adds the mechanic to QEMU to create the node
name according to the device type too.

The data structure has been roughly copied from OpenBIOS/OpenHackware,
node names from SLOF.

Example:

Hotplugging some PCI cards with QEMU monitor:

device_add virtio-tablet-pci
device_add virtio-serial-pci
device_add virtio-mouse-pci
device_add virtio-scsi-pci
device_add virtio-gpu-pci
device_add ne2k_pci
device_add nec-usb-xhci
device_add intel-hda

What we can see in linux device tree:

for dir in /proc/device-tree/pci@800000020000000/*@*/; do
    echo $dir
    cat $dir/name
    echo
done

WITHOUT this patch:

/proc/device-tree/pci@800000020000000/pci@0/
pci
/proc/device-tree/pci@800000020000000/pci@1/
pci
/proc/device-tree/pci@800000020000000/pci@2/
pci
/proc/device-tree/pci@800000020000000/pci@3/
pci
/proc/device-tree/pci@800000020000000/pci@4/
pci
/proc/device-tree/pci@800000020000000/pci@5/
pci
/proc/device-tree/pci@800000020000000/pci@6/
pci
/proc/device-tree/pci@800000020000000/pci@7/
pci

WITH this patch:

/proc/device-tree/pci@800000020000000/communication-controller@1/
communication-controller
/proc/device-tree/pci@800000020000000/display@4/
display
/proc/device-tree/pci@800000020000000/ethernet@5/
ethernet
/proc/device-tree/pci@800000020000000/input-controller@0/
input-controller
/proc/device-tree/pci@800000020000000/mouse@2/
mouse
/proc/device-tree/pci@800000020000000/multimedia-device@7/
multimedia-device
/proc/device-tree/pci@800000020000000/scsi@3/
scsi
/proc/device-tree/pci@800000020000000/usb-xhci@6/
usb-xhci

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:39 +11:00
Laurent Vivier
089f7e827d PCI: add missing classes in pci_ids.h to build device tree
To allow QEMU to add PCI entries in device tree,
we must have a more exhaustive list of PCI class IDs.

This patch synchronizes as much as possible with
pci_ids.h and add some missing IDs from SLOF.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:38 +11:00
Nikunj A Dadhania
1bd33d0d7c target/ppc: optimize gen_write_xer()
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:38 +11:00
Nikunj A Dadhania
00b7078831 target/ppc: move cpu_[read, write]_xer to cpu.c
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-03-01 11:23:38 +11:00
Mark Cave-Ayland
9b40d1ee13 Update OpenBIOS images to 0cd97cc built from submodule.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2017-02-28 21:50:03 +00:00
Pranith Kumar
dc1eccd661 aarch64: Change ext type to TCGType to fix warnings
To fix the following warnings:

In file included from /users/pranith/qemu/tcg/tcg.c:255:
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:879:24: warning: implicit conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different enumeration type 'TCGType' (aka 'enum TCGType')
      [-Wenum-conversion]
        tcg_out_cmp(s, ext, a, b, b_const);
        ~~~~~~~~~~~    ^~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:893:36: warning: implicit conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different enumeration type 'TCGType' (aka 'enum TCGType')
      [-Wenum-conversion]
        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:389:65: note: expanded from macro 'tcg_out_insn'
    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
                                                                ^
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:895:37: warning: implicit conversion from enumeration type 'TCGMemOp' (aka 'enum TCGMemOp') to different enumeration type 'TCGType' (aka 'enum TCGType')
      [-Wenum-conversion]
        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:389:65: note: expanded from macro 'tcg_out_insn'
    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
                                                                ^
/users/pranith/qemu/tcg/aarch64/tcg-target.inc.c:1610:27: warning: implicit conversion from enumeration type 'TCGType' (aka 'enum TCGType') to different enumeration type 'TCGMemOp' (aka 'enum TCGMemOp')
      [-Wenum-conversion]
        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
        ~~~~~~~~~~~~~~    ^~~

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <20170217154311.13920-1-bobby.prani@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-03-01 08:28:06 +11:00
Marc-André Lureau
f3f8e81150 tests: fix endianness-test leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
072bdb07c5 tests: fix ptimer leaks
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
461a862022 glib-compat: add g_test_add_data_func_full fallback
Move the fallback from qtest_add_data_func_full() to glib-compat.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
e703dcbaee timer: use an inline function for free
Similarly to allocation, do it from an inline function. This allows
tests to only use the headers for allocation/free of timer.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
14324f585d tests: fix leaks in test-io-channel-command
No need for strdup, fix leaks when socat is missing.

Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: "Daniel P. Berrange" <berrange@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
dc491fead0 tests: fix qmp response leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-03-01 00:09:28 +04:00
Marc-André Lureau
fc34059f08 qtest: fix a memory leak
Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-03-01 00:09:28 +04:00
Peter Maydell
758af5e862 Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20170228' into staging
Network boot for s390x. More information (and instructions
for building a s390-netboot.img) can be found at
http://wiki.qemu-project.org/Features/S390xNetworkBoot

# gpg: Signature made Tue 28 Feb 2017 11:27:18 GMT
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20170228:
  pc-bios/s390-ccw.img: rebuild image
  pc-bios/s390-ccw: Use the ccw bios to start the network boot
  s390x/ipl: Load network boot image
  s390x/ipl: Extend S390IPLState to support network boot
  elf-loader: Allow late loading of elf

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 19:52:26 +00:00
Kevin Wolf
b2c2832c61 block: Add Error parameter to bdrv_append()
Aborting on error in bdrv_append() isn't correct. This patch fixes it
and lets the callers handle failures.

Test case 085 needs a reference output update. This is caused by the
reversed order of bdrv_set_backing_hd() and change_parent_backing_link()
in bdrv_append(): When the backing file of the new node is set, the
parent nodes are still pointing to the old top, so the backing blocker
is now initialised with the node name rather than the BlockBackend name.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:51 +01:00
Kevin Wolf
12fa4af61f block: Add Error parameter to bdrv_set_backing_hd()
Not all callers of bdrv_set_backing_hd() know for sure that attaching
the backing file will be allowed by the permission system. Return the
error from the function rather than aborting.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:51 +01:00
Kevin Wolf
c8f6d58edb block: Assertions for resize permission
This adds an assertion that ensures that the necessary resize permission
has been granted before bdrv_truncate() is called.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
afa4b29323 block: Assertions for write permissions
This adds assertions that ensure that the necessary write permissions
have been granted before someone attempts to write to a node.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
85c97ca7a1 block: Pass BdrvChild to bdrv_aligned_preadv/pwritev and copy-on-read
This is where we want to check the permissions, so we need to have the
BdrvChild around where they are stored.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
2807c0cd43 tests: Remove FIXME comments
Not requesting any permissions is actually correct for these test cases
because no actual I/O or other operation covered by the permission
system is performed.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
8a7ce4f933 nbd/server: Use real permissions for NBD exports
NBD can't cope with device size changes, so resize must be forbidden,
but otherwise we can tolerate anything. Depending on whether the export
is writable or not, we only require consistent reads and writes.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
6f5ef23a3f migration/block: Use real permissions
Request BLK_PERM_CONSISTENT_READ for the source of block migration, and
handle potential permission errors as good as we can in this place
(which is not very good, but it matches the other failure cases).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
887354bd13 hmp: Request permissions in qemu-io
The HMP command 'qemu-io' is a bit tricky because it wants to work on
the original BlockBackend, but additional permissions could be required.
The details are explained in a comment in the code, but in summary, just
request whatever permissions the current qemu-io command needs.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
0db832f42e commit: Add filter-node-name to block-commit
Management tools need to be able to know about every node in the graph
and need a way to address them. Changing the graph structure was okay
because libvirt doesn't really manage the node level yet, but future
libvirt versions need to deal with both new and old version of qemu.

This new option to blockdev-commit allows the client to set a node-name
for the automatically inserted filter driver, and at the same time
serves as a witness for a future libvirt that this version of qemu does
automatically insert a filter driver.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
6cdbceb12c mirror: Add filter-node-name to blockdev-mirror
Management tools need to be able to know about every node in the graph
and need a way to address them. Changing the graph structure was okay
because libvirt doesn't really manage the node level yet, but future
libvirt versions need to deal with both new and old version of qemu.

This new option to blockdev-mirror allows the client to set a node-name
for the automatically inserted filter driver, and at the same time
serves as a witness for a future libvirt that this version of qemu does
automatically insert a filter driver.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
a170a91fd3 stream: Use real permissions in streaming block job
The correct permissions are relatively obvious here (and explained in
code comments). For intermediate streaming, we need to reopen the top
node read-write before creating the job now because the permissions
system catches attempts to get the BLK_PERM_WRITE_UNCHANGED permission
on a read-only node.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:47:50 +01:00
Kevin Wolf
4ef85a9c23 mirror: Use real permissions in mirror/active commit block job
The mirror block job is mainly used for two different scenarios:
Mirroring to an otherwise unused, independent target node, or for active
commit where the target node is part of the backing chain of the source.

Similarly to the commit block job patch, we need to insert a new filter
node to keep the permissions correct during active commit.

Note that one change this implies is that job->blk points to
mirror_top_bs as its root now, and mirror_top_bs (rather than the actual
source node) contains the bs->job pointer. This requires qemu-img commit
to get the job by name now rather than just taking bs->job.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Acked-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:47:46 +01:00
Kevin Wolf
bbc02b90bc blockjob: Factor out block_job_remove_all_bdrv()
In some cases, we want to remove op blockers on intermediate nodes
before the whole block job transaction has completed (because they block
restoring the final graph state during completion). Provide a function
for this.

The whole block job lifecycle is a bit messed up and it's hard to
actually do all things in the right order, but I'll leave simplifying
this for another day.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
3e44c8e08a block: Allow backing file links in change_parent_backing_link()
Now that the backing file child role implements .attach/.detach
callbacks, nothing prevents us from modifying the graph even if that
involves changing backing file links.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
db95dbba3b block: BdrvChildRole.attach/detach() callbacks
Backing files are somewhat special compared to other kinds of children
because they are attached and detached using bdrv_set_backing_hd()
rather than the normal set of functions, which does a few more things
like setting backing blockers, toggling the BDRV_O_NO_BACKING flag,
setting parent_bs->backing_file, etc.

These special features are a reason why change_parent_backing_link()
can't handle backing files yet. With abstracting the additional features
into .attach/.detach callbacks, we get a step closer to a function that
can actually deal with this.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
dd65a52e4a block: Fix pending requests check in bdrv_append()
bdrv_append() cares about isolation of the node that it modifies, but
not about activity in some subtree below it. Instead of using the
recursive bdrv_requests_pending(), directly check bs->in_flight, which
considers only the node in question.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
4e9e4323d5 backup: Use real permissions in backup block job
The backup block job doesn't have very complicated requirements: It
needs to read from the source and write to the target, but it's fine
with either side being changed. The only restriction is that we can't
resize the image because the job uses a cached value.

qemu-iotests 055 needs to be changed because it used a target which was
already attached to a virtio-blk device. The permission system correctly
forbids this (virtio-blk can't accept another writer with its default
share-rw=off).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
d3f0675922 commit: Use real permissions for HMP 'commit'
This is a little simpler than the commit block job because it's
synchronous and only commits into the immediate backing file, but
otherwise doing more or less the same.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
8dfba27977 commit: Use real permissions in commit block job
This is probably one of the most interesting conversions to the new
op blocker system because a commit block job intentionally leaves some
intermediate block nodes in the backing chain that aren't valid on their
own any more; only the whole chain together results in a valid view.

In order to provide the 'consistent read' permission to the parents of
the 'top' node of the commit job, a new filter block driver is inserted
above 'top' which doesn't require 'consistent read' on its backing
chain. Subsequently, the commit job can block 'consistent read' on all
intermediate nodes without causing a conflict.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
76d554e20b blockjob: Add permissions to block_job_add_bdrv()
Block jobs don't actually do I/O through the the reference they create
with block_job_add_bdrv(), but they might want to use the permisssion
system to express what the block job does to intermediate nodes. This
adds permissions to block_job_add_bdrv() to provide the means to request
permissions.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
26de9438c1 block: Add BdrvChildRole.stay_at_node
When the parents' child links are updated in bdrv_append() or
bdrv_replace_in_backing_chain(), this should affect all child links of
BlockBackends or other nodes, but not on child links held for other
purposes (like for setting permissions). This patch allows to control
the behaviour per BdrvChildRole.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
d083319fe0 block: Include details on permission errors in message
Instead of just telling that there was some conflict, we can be specific
and tell which permissions were in conflict and which way the conflict
is.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
b541155587 block: Add BdrvChildRole.get_parent_desc()
For meaningful error messages in the permission system, we need to get
some human-readable description of the parent of a BdrvChild.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
c6cc12bfa7 blockjob: Add permissions to block_job_create()
This functions creates a BlockBackend internally, so the block jobs need
to tell it what they want to do with the BB.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:37 +01:00
Kevin Wolf
dabd18f64c hw/block: Introduce share-rw qdev property
By default, don't allow another writer for block devices that are
attached to a guest device. For the cases where this setup is intended
(e.g. using a cluster filesystem on the disk), the new option can be
used to allow it.

This change affects only devices using DEFINE_BLOCK_PROPERTIES().
Devices directly using DEFINE_PROP_DRIVE() still accept writers
unconditionally.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
a17c17a274 hw/block: Request permissions
This makes all device emulations with a qdev drive property request
permissions on their BlockBackend. The only thing we block at this point
is resizing images for some devices that can't support it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
39829a01ae block: Allow error return in BlockDevOps.change_media_cb()
Some devices allow a media change between read-only and read-write
media. They need to adapt the permissions in their .change_media_cb()
implementation, which can fail. So add an Error parameter to the
function.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
c62d32f503 block: Request real permissions in blk_new_open()
We can figure out the necessary permissions from the flags that the
caller passed.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
55880601d8 block: Add BDRV_O_RESIZE for blk_new_open()
blk_new_open() is a convenience function that processes flags rather
than QDict options as a simple way to just open an image file.

In order to keep it convenient in the future, it must automatically
request the necessary permissions. This can easily be inferred from the
flags for read and write, but we need another flag that tells us whether
to get the resize permission.

We can't just always request it because that means that no block jobs
can run on the resulting BlockBackend (which is something that e.g.
qemu-img commit wants to do), but we also can't request it never because
most of the .bdrv_create() implementations call blk_truncate().

The solution is to introduce another flag that is passed by all users
that want to resize the image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
d7086422b1 block: Add error parameter to blk_insert_bs()
Now that blk_insert_bs() requests the BlockBackend permissions for the
node it attaches to, it can fail. Instead of aborting, pass the errors
to the callers.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
6d0eb64d5c block: Add permissions to blk_new()
We want every user to be specific about the permissions it needs, so
we'll pass the initial permissions as parameters to blk_new(). A user
only needs to call blk_set_perm() if it wants to change the permissions
after the fact.

The permissions are stored in the BlockBackend and applied whenever a
BlockDriverState should be attached in blk_insert_bs().

This does not include actually choosing the right set of permissions
everywhere yet. Instead, the usual FIXME comment is added to each place
and will be addressed in individual patches.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
981776b348 block: Add permissions to BlockBackend
The BlockBackend can now store the permissions that its user requires.
This is necessary because nodes can be ejected from or inserted into a
BlockBackend and all of these operations must make sure that the user
still gets what it requested initially.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
f68c598be6 block: Request real permissions in bdrv_attach_child()
Now that all block drivers with children tell us what permissions they
need from each of their children, bdrv_attach_child() can use this
information and make the right requirements while trying to attach new
children.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
78e421c9fb block: Require .bdrv_child_perm() with child nodes
All block drivers that can have child nodes implement .bdrv_child_perm()
now. Make this officially a requirement by asserting that only drivers
without children can omit .bdrv_child_perm().

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
91ef38257a vvfat: Implement .bdrv_child_perm()
vvfat is the last remaining driver that can have children, but doesn't
implement .bdrv_child_perm() yet. The default handlers aren't suitable
here, so let's implement a very simple driver-specific one that protects
the internal child from being used by other users as good as our
permissions permit.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
862f215fab block: Request child permissions in format drivers
This makes use of the .bdrv_child_perm() implementation for formats that
we just added. All format drivers expose the permissions they actually
need nows, so that they can be set accordingly and updated when parents
are attached or detached.

The only format not included here is raw, which was already converted
with the other filter drivers.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
6b1a044afb block: Default .bdrv_child_perm() for format drivers
Almost all format drivers have the same characteristics as far as
permissions are concerned: They have one or more children for storing
their own data and, more importantly, metadata (can be written to and
grow even without external write requests, must be protected against
other writers and present consistent data) and optionally a backing file
(this is just data, so like for a filter, it only depends on what the
parent nodes need).

This provides a default implementation that can be shared by most of
our format drivers.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
d7010dfb68 block: Request child permissions in filter drivers
All callers will have to request permissions for all of their child
nodes. Block drivers that act as simply filters can use the default
implementation of .bdrv_child_perm().

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
6a1b9ee152 block: Default .bdrv_child_perm() for filter drivers
Most filters need permissions related to read and write for their
children, but only if the node has a parent that wants to use the same
operation on the filter. The same is true for resize.

This adds a default implementation that simply forwards all necessary
permissions to all children of the node and leaves the other permissions
unchanged.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
33a610c398 block: Involve block drivers in permission granting
In many cases, the required permissions of one node on its children
depend on what its parents require from it. For example, the raw format
or most filter drivers only need to request consistent reads if that's
something that one of their parents wants.

In order to achieve this, this patch introduces two new BlockDriver
callbacks. The first one lets drivers first check (recursively) whether
the requested permissions can be set; the second one actually sets the
new permission bitmask.

Also add helper functions that drivers can use in their implementation
of the callbacks to update their permissions on a specific child.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
d5e6f437c5 block: Let callers request permissions when attaching a child node
When attaching a node as a child to a new parent, the required and
shared permissions for this parent are checked against all other parents
of the node now, and an error is returned if there is a conflict.

This allows error returns to a function that previously always
succeeded, and the same is true for quite a few callers and their
callers. Converting all of them within the same patch would be too much,
so for now everyone tells that they don't need any permissions and allow
everyone else to do anything. This way we can use &error_abort initially
and convert caller by caller to pass actual permission requirements and
implement error handling.

All these places are marked with FIXME comments and it will be the job
of the next patches to clean them up again.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:36 +01:00
Kevin Wolf
8b2ff5291f block: Add Error argument to bdrv_attach_child()
It will have to return an error soon, so prepare the callers for it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:35 +01:00
Kevin Wolf
7006c9a761 block: Add op blocker permission constants
This patch defines the permission categories that will be used by the
new op blocker system.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:40:35 +01:00
Markus Armbruster
9e19ad4e49 option: Tweak invalid size error message and unbreak iotest 049
Commit 75cdcd1 neglected to update tests/qemu-iotests/049.out, and
made the error message for negative size worse.  Fix that.

Reported-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-28 20:40:31 +01:00
Peter Lieven
2d9187bc65 qemu-img: make convert async
the convert process is currently completely implemented with sync operations.
That means it reads one buffer and then writes it. No parallelism and each sync
request takes as long as it takes until it is completed.

This can be a big performance hit when the convert process reads and writes
to devices which do not benefit from kernel readahead or pagecache.
In our environment we heavily have the following two use cases when using
qemu-img convert.

a) reading from NFS and writing to iSCSI for deploying templates
b) reading from iSCSI and writing to NFS for backups

In both processes we use libiscsi and libnfs so we have no kernel cache.

This patch changes the convert process to work with parallel running coroutines
which can significantly improve performance for network storage devices:

qemu-img (master)
 nfs -> iscsi 22.8 secs
 nfs -> ram   11.7 secs
 ram -> iscsi 12.3 secs

qemu-img-async (8 coroutines, in-order write disabled)
 nfs -> iscsi 11.0 secs
 nfs -> ram   10.4 secs
 ram -> iscsi  9.0 secs

This patches introduces 2 new cmdline parameters. The -m parameter to specify
the number of coroutines running in parallel (defaults to 8). And the -W parameter to
allow qemu-img to write to the target out of order rather than sequential. This improves
performance as the writes do not have to wait for each other to complete.

Signed-off-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-28 20:40:31 +01:00
Marc-André Lureau
e7c83a885f vhost-user: delay vhost_user_stop
Since commit b0a335e351, a socket write
may trigger a disconnect events, calling vhost_user_stop() and clearing
all the vhost_dev strutures holding data that vhost.c functions expect
to remain valid. Delay the cleanup to keep the vhost_dev structure
valid during the vhost.c functions.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170227104956.24729-1-marcandre.lureau@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 19:11:15 +00:00
Zhang Chen
daa33c5215 Add a new qmp command to do checkpoint, query xen replication status
We can call this qmp command to do checkpoint outside of qemu.
Xen colo will need this function.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wencongyang@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-02-28 11:02:12 -08:00
Zhang Chen
2c9639ecab Add a new qmp command to start/stop replication
We can call this qmp command to start/stop replication outside of qemu.
Like Xen colo need this function.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wencongyang@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2017-02-28 11:01:56 -08:00
Peter Maydell
9514f2648c Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging
This pull request brings:
- a fix to a minor bug reported by Coverity
- throttling support in the local backend (command line only)

# gpg: Signature made Tue 28 Feb 2017 09:32:30 GMT
# gpg:                using DSA key 0x02FC3AEB0101DBC2
# gpg: Good signature from "Greg Kurz <groug@kaod.org>"
# gpg:                 aka "Greg Kurz <groug@free.fr>"
# gpg:                 aka "Greg Kurz <gkurz@linux.vnet.ibm.com>"
# gpg:                 aka "Gregory Kurz (Groug) <groug@free.fr>"
# gpg:                 aka "[jpeg image of size 3330]"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2BD4 3B44 535E C0A7 9894  DBA2 02FC 3AEB 0101 DBC2

* remotes/gkurz/tags/for-upstream:
  throttle: factor out duplicate code
  fsdev: add IO throttle support to fsdev devices
  9pfs: fix v9fs_lock error case

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:39:49 +00:00
Clement Deschamps
1eeb5c7dea bcm2835: add sdhost and gpio controllers
This adds the bcm2835_sdhost and bcm2835_gpio to the BCM2835 platform.

For supporting the SD controller selection (alternate function of GPIOs
48-53), the bcm2835_gpio now exposes an sdbus.
It also has a link to both the sdbus of sdhci and sdhost controllers,
and the card is reparented from one bus to another when the alternate
function of GPIOs 48-53 is modified.

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488293711-14195-5-git-send-email-peter.maydell@linaro.org
Message-id: 20170224164021.9066-5-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Clement Deschamps
d72fc9dcb1 bcm2835_gpio: add bcm2835 gpio controller
This adds the BCM2835 GPIO controller.

It currently implements:
- The 54 GPIOs as outputs (qemu_irq)
- The SD controller selection via alternate function of GPIOs 48-53

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488293711-14195-4-git-send-email-peter.maydell@linaro.org
Message-id: 20170224164021.9066-4-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Clement Deschamps
97fb87cc5d hw/sd: add card-reparenting function
Provide a new function sdbus_reparent_card() in sd core for reparenting
a card from a SDBus to another one.

This function is required by the raspi platform, where the two SD
controllers can be dynamically switched.

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1488293711-14195-3-git-send-email-peter.maydell@linaro.org
Message-id: 20170224164021.9066-3-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: added a doc comment to the header file; changed to
 use new behaviour of qdev_set_parent_bus()]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Peter Maydell
91c968ac72 qdev: Have qdev_set_parent_bus() handle devices already on a bus
Instead of qdev_set_parent_bus() silently doing the wrong
thing if it's handed a device that's already on a bus,
have it remove the device from the old bus and add it to
the new one. This is useful for the raspi2 sdcard.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Message-id: 1488293711-14195-2-git-send-email-peter.maydell@linaro.org
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
07a5628cb8 hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers
Reset CPU interface registers of GICv3 when CPU is reset.
For this, ARMCPRegInfo struct is registered with one ICC
register whose resetfn is called when cpu is reset.

All the ICC registers are reset under one single register
reset function instead of calling resetfn for each ICC
register.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487850673-26455-6-git-send-email-vijay.kilari@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
d3a3e52962 target-arm: Add GICv3CPUState in CPUARMState struct
Add gicv3state void pointer to CPUARMState struct
to store GICv3CPUState.

In case of usecase like CPU reset, we need to reset
GICv3CPUState of the CPU. In such scenario, this pointer
becomes handy.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487850673-26455-5-git-send-email-vijay.kilari@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
367b9f527b hw/intc/arm_gicv3_kvm: Implement get/put functions
This actually implements pre_save and post_load methods for in-kernel
vGICv3.

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Message-id: 1487850673-26455-4-git-send-email-vijay.kilari@gmail.com
[PMM:
 * use decimal, not 0bnnn
 * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
 * completely rearranged the get and put functions to read and write
   the state in a natural order, rather than mixing distributor and
   redistributor state together]
Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
[Vijay:
 * Update macro KVM_VGIC_ATTR
 * Use 32 bit access for gicd and gicr
 * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
   access  are changed from 64-bit to 32-bit access
 * Add ICC_SRE_EL1 save and restore
 * Dropped translate_fn mechanism and coded functions to handle
   save and restore of edge_trigger and priority
 * Number of APnR register saved/restored based on number of
   priority bits supported]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Vijaya Kumar K
6692aac411 hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate
To Save and Restore ICC_SRE_EL1 register introduce vmstate
subsection and load only if non-zero.
Also initialize icc_sre_el1 with to 0x7 in pre_load
function.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487850673-26455-3-git-send-email-vijay.kilari@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:10:00 +00:00
Jeff Cody
8a47e8eb59 block/rbd: add blockdev-add support
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-28 11:32:40 -05:00
Jeff Cody
c7cacb3e7a block/rbd: parse all options via bdrv_parse_filename
Get rid of qemu_rbd_parsename in favor of bdrv_parse_filename.
This simplifies a lot of the parsing as well, as we can treat everything
a bit simpler since nonexistent options are simply NULL pointers instead
of empty strings.

An important item to note:

Ceph has many extra option values that can be specified as key/value
pairs.  This was handled previously in the driver by extracting the
values that the QEMU driver cared about, and then blindly passing all
extra options to rbd after splitting them into key/value pairs, and
cleaning up any special character escaping.

The practice is continued in this patch; there is an option
"keyvalue-pairs" that is populated with all the key/value pairs that the
QEMU driver does not care about.  These key/value pairs will override
any settings in the 'conf' configuration file, just as they did before.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-28 11:32:31 -05:00
Jeff Cody
0f9d252de4 block/rbd: add all the currently supported runtime_opts
This adds all the currently supported runtime opts, which
are the options as parsed from the filename.  All of these
options are explicitly checked for during during runtime,
with an exception to the "keyvalue-pairs" option.

This option contains all the key/value pairs that the QEMU rbd
driver merely unescapes, and passes along blindly to rados.  This
option is a "legacy" option, and will not be exposed in the QAPI
or available for introspection.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-28 11:32:25 -05:00
Jeff Cody
7830f90998 block/rbd: don't copy strings in qemu_rbd_next_tok()
This patch is prep work for parsing options for .bdrv_parse_filename,
and using QDict options.

The function qemu_rbd_next_tok() searched for various key/value pairs,
and copied them into buffers.  This will soon be an unnecessary extra
step, so we will now return found strings by reference only, and
offload the responsibility for safely handling/coping these strings to
the caller.

This also cleans up error handling some, as the callers now rely on
the Error object to determine if there is a parse error.

Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-28 11:32:06 -05:00
Peter Maydell
7d1730b7d9 Merge remote-tracking branch 'remotes/mjt/tags/trivial-patches-fetch' into staging
trivial patches for 2017-02-28

# gpg: Signature made Tue 28 Feb 2017 06:43:55 GMT
# gpg:                using RSA key 0x701B4F6B1A693E59
# gpg: Good signature from "Michael Tokarev <mjt@tls.msk.ru>"
# gpg:                 aka "Michael Tokarev <mjt@corpit.ru>"
# gpg:                 aka "Michael Tokarev <mjt@debian.org>"
# Primary key fingerprint: 6EE1 95D1 886E 8FFB 810D  4324 457C E0A0 8044 65C5
#      Subkey fingerprint: 7B73 BAD6 8BE7 A2C2 8931  4B22 701B 4F6B 1A69 3E59

* remotes/mjt/tags/trivial-patches-fetch:
  syscall: fixed mincore(2) not failing with ENOMEM
  hw/acpi/tco.c: fix tco timer stop
  lm32: milkymist-tmu2: fix a third integer overflow
  qemu-options.hx: add missing id=chr0 chardev argument in vhost-user example
  Update copyright year
  tests/prom-env: Enable the test for the sun4u machine, too
  cadence_gem: Remove unused parameter debug message
  register: fix incorrect read mask
  ide: remove undefined behavior in ide-test
  CODING_STYLE: Mention preferred comment form
  hw/core/register: Mark the device with cannot_instantiate_with_device_add_yet
  hw/core/or-irq: Mark the device with cannot_instantiate_with_device_add_yet
  softfloat: Use correct type in float64_to_uint64_round_to_zero()
  target/s390x: Fix typo

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 16:22:41 +00:00
Paolo Bonzini
3a5eb5b4a9 update Linux headers to 4.11
virtio_mmio.h would be deleted; I am leaving it in though it was a
mistake to add it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 16:18:49 +00:00
Paolo Bonzini
f717e6245f update-linux-headers: update for 4.11
The linux-headers/asm-arm/unistd.h file has been split in three
sub-files, copy them along.  However, building them requires
setting ARCH rather than SRCARCH.

SRCARCH defaults to $(ARCH) anyway; to avoid future occurrence of
the same problem use ARCH for all architectures where SRCARCH=ARCH.
Currently these are all except x86, sparc, sh and tile.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170221122920.16245-2-pbonzini@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 16:18:49 +00:00
Peter Maydell
8a85e0654e stm32f205: Rename 'nvic' local to 'armv7m'
The local variable 'nvic' in stm32f205_soc_realize() no longer
holds a direct pointer to the NVIC device; it is a pointer to
the ARMv7M container object. Rename it 'armv7m' accordingly.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-12-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
b72e2f6856 stm32f205: Create armv7m object without using armv7m_init()
Switch the stm32f205 SoC to create the armv7m object directly
rather than via the armv7m_init() wrapper. This fits better
with the SoC model's very QOMified design.

In particular this means we can push loading the guest image
out to the top level board code where it belongs, rather
than the SoC object having a QOM property for the filename
to load.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-11-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
ff68dacbc7 armv7m: Split systick out from NVIC
The SysTick timer isn't really part of the NVIC proper;
we just modelled it that way back when we couldn't
easily have devices that only occupied a small chunk
of a memory region. Split it out into its own device.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1487604965-23220-10-git-send-email-peter.maydell@linaro.org
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 16:18:49 +00:00
Peter Maydell
743eb70560 armv7m: Don't put core v7M devices under CONFIG_STELLARIS
The NVIC is a core v7M device that exists for all v7M CPUs;
put it under a CONFIG_ARM_V7M rather than hiding it under
CONFIG_STELLARIS.

(We'll use CONFIG_ARM_V7M for the SysTick device too
when we split it out of the NVIC.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-9-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
f68d881c9b armv7m: Make bitband device take the address space to access
Instead of the bitband device doing a cpu_physical_memory_read/write,
make it take a MemoryRegion which specifies where it should be
accessing, and use address_space_read/write to access the
corresponding AddressSpace.

Since this entails pretty much a rewrite, convert away from
old_mmio in the process.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-8-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
98957a94ef armv7m: Make NVIC expose a memory region rather than mapping itself
Make the NVIC device expose a memory region for its users
to map, rather than mapping itself into the system memory
space on realize, and get the one user (the ARMv7M object)
to do this.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-7-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
618119c2d3 armv7m: Make ARMv7M object take memory region link
Make the ARMv7M object take a memory region link which it uses
to wire up the bitband rather than having them always put
themselves in the system address space.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-6-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
21e0c38fe2 armv7m: Use QOMified armv7m object in armv7m_init()
Make the legacy armv7m_init() function use the newly QOMified
armv7m object rather than doing everything by hand.

We can return the armv7m object rather than the NVIC from
armv7m_init() because its interface to the rest of the
board (GPIOs, etc) is identical.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-5-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
56b7c66f49 armv7m: QOMify the armv7m container
Create a proper QOM object for the armv7m container, which
holds the CPU, the NVIC and the bitband regions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-4-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
6bf436cf9d armv7m: Move NVICState struct definition into header
Move the NVICState struct definition into a header, so we can
embed it into other QOM objects like SoCs.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-3-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
3651c28569 armv7m: Abstract out the "load kernel" code
Abstract the "load kernel" code out of armv7m_init() into its own
function.  This includes the registration of the CPU reset function,
to parallel how we handle this for A profile cores.

We make the function public so that boards which choose to
directly instantiate an ARMv7M device object can call it.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1487604965-23220-2-git-send-email-peter.maydell@linaro.org
2017-02-28 16:18:49 +00:00
Peter Maydell
1bbe5dc66b Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170228' into staging
target-arm queue:
 * raspi2: implement RNG module
 * raspi2: implement new SD card controller (but don't wire it up)
 * sdhci: bugfixes for block transfers
 * virt: fix cpu object reference leak
 * Add missing fp_access_check() to aarch64 crypto instructions
 * cputlb: Don't assume do_unassigned_access() never returns
 * virt: Add a user option to disallow ITS instantiation
 * i.MX timers: fix reset handling
 * ARMv7M NVIC: rewrite to fix broken priority handling and masking
 * exynos: Fix proper mapping of CPUs by providing real cluster ID
 * exynos: Fix Linux kernel division by zero for PLLs

# gpg: Signature made Tue 28 Feb 2017 12:40:51 GMT
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170228: (27 commits)
  hw/arm/exynos: Fix proper mapping of CPUs by providing real cluster ID
  hw/arm/exynos: Fix Linux kernel division by zero for PLLs
  bcm2835_sdhost: add bcm2835 sdhost controller
  armv7m: Allow SHCSR writes to change pending and active bits
  armv7m: Raise correct kind of UsageFault for attempts to execute ARM code
  armv7m: Check exception return consistency
  armv7m: Extract "exception taken" code into functions
  armv7m: VECTCLRACTIVE and VECTRESET are UNPREDICTABLE
  armv7m: Simpler and faster exception start
  armv7m: Remove unused armv7m_nvic_acknowledge_irq() return value
  armv7m: Escalate exceptions to HardFault if necessary
  arm: gic: Remove references to NVIC
  armv7m: Fix condition check for taking exceptions
  armv7m: Rewrite NVIC to not use any GIC code
  armv7m: Implement reading and writing of PRIGROUP
  armv7m: Rename nvic_state to NVICState
  ARM i.MX timers: fix reset handling
  hw/arm/virt: Add a user option to disallow ITS instantiation
  cputlb: Don't assume do_unassigned_access() never returns
  Add missing fp_access_check() to aarch64 crypto instructions
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 14:50:17 +00:00
Peter Maydell
c8c0a1a784 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Tue 28 Feb 2017 04:34:34 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  iscsi: add missing colons to the qapi docs
  block/mirror: fix broken sparseness detection

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 13:41:03 +00:00
Peter Maydell
a57aaa4e74 Merge remote-tracking branch 'remotes/rth/tags/pull-axp-20170228' into staging
Enable MTTCG for Alpha guest

# gpg: Signature made Tue 28 Feb 2017 00:43:17 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-axp-20170228:
  target/alpha: Enable MTTCG by default

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 13:01:50 +00:00
Alex Bennée
1ed9251515 .shippable: add s390x-cross target
Use the new debian-s390x-cross.docker target to cross compile for
s390.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20170227143028.16428-3-alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:31:01 +08:00
Alex Bennée
267004d991 new: dockerfiles/debian-s390-cross
This adds an s390 cross build target to our library of docker setups.
There is an issue with the xfslibs-dev:s390x package having a clash so
we do a || apt-get -f install to fixup the rest of the dependencies.

This doesn't build on the debian.docker file as we are using the
multilib compiler which is only available in stretch (the current
testing repo).

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
CC: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <20170227143028.16428-2-alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-28 20:31:01 +08:00
Krzysztof Kozlowski
f3a6339a5b hw/arm/exynos: Fix proper mapping of CPUs by providing real cluster ID
The Exynos4210 has cluster ID 0x9 in its MPIDR register (raw value
0x8000090x).  If this cluster ID is not provided, then Linux kernel
cannot map DeviceTree nodes to MPIDR values resulting in kernel
warning and lack of any secondary CPUs:

    DT missing boot CPU MPIDR[23:0], fall back to default cpu_logical_map
    ...
    smp: Bringing up secondary CPUs ...
    smp: Brought up 1 node, 1 CPU
    SMP: Total of 1 processors activated (24.00 BogoMIPS).

Provide a cluster ID so Linux will see proper MPIDR and will try to
bring the secondary CPU online.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Message-id: 20170226200142.31169-2-krzk@kernel.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:20 +00:00
Krzysztof Kozlowski
1e0228fd20 hw/arm/exynos: Fix Linux kernel division by zero for PLLs
Without any clock controller, the Linux kernel was hitting division by
zero during boot or with clk_summary:
[    0.000000] [<c031054c>] (unwind_backtrace) from [<c030ba6c>] (show_stack+0x10/0x14)
[    0.000000] [<c030ba6c>] (show_stack) from [<c05b2660>] (dump_stack+0x88/0x9c)
[    0.000000] [<c05b2660>] (dump_stack) from [<c05b11a4>] (Ldiv0+0x8/0x10)
[    0.000000] [<c05b11a4>] (Ldiv0) from [<c06ad1e0>] (samsung_pll45xx_recalc_rate+0x58/0x74)
[    0.000000] [<c06ad1e0>] (samsung_pll45xx_recalc_rate) from [<c0692ec0>] (clk_register+0x39c/0x63c)
[    0.000000] [<c0692ec0>] (clk_register) from [<c125d360>] (samsung_clk_register_pll+0x2e0/0x3d4)
[    0.000000] [<c125d360>] (samsung_clk_register_pll) from [<c125d7e8>] (exynos4_clk_init+0x1b0/0x5e4)
[    0.000000] [<c125d7e8>] (exynos4_clk_init) from [<c12335f4>] (of_clk_init+0x17c/0x210)
[    0.000000] [<c12335f4>] (of_clk_init) from [<c1204700>] (time_init+0x24/0x2c)
[    0.000000] [<c1204700>] (time_init) from [<c1200b2c>] (start_kernel+0x24c/0x38c)
[    0.000000] [<c1200b2c>] (start_kernel) from [<4020807c>] (0x4020807c)

Provide stub for clock controller returning reset values for PLLs.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Message-id: 20170226200142.31169-1-krzk@kernel.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:20 +00:00
Clement Deschamps
43ddc182e2 bcm2835_sdhost: add bcm2835 sdhost controller
This adds the BCM2835 SDHost controller from Arasan.

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Message-id: 20170224164021.9066-2-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
5db53e353d armv7m: Allow SHCSR writes to change pending and active bits
Implement the NVIC SHCSR write behaviour which allows pending and
active status of some exceptions to be changed.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
e13886e3a7 armv7m: Raise correct kind of UsageFault for attempts to execute ARM code
M profile doesn't implement ARM, and the architecturally required
behaviour for attempts to execute with the Thumb bit clear is to
generate a UsageFault with the CFSR INVSTATE bit set.  We were
incorrectly implementing this as generating an UNDEFINSTR UsageFault;
fix this.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
aa488fe3bb armv7m: Check exception return consistency
Implement the exception return consistency checks
described in the v7M pseudocode ExceptionReturn().

Inspired by a patch from Michael Davidsaver's series, but
this is a reimplementation from scratch based on the
ARM ARM pseudocode.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
39ae2474e3 armv7m: Extract "exception taken" code into functions
Extract the code from the tail end of arm_v7m_do_interrupt() which
enters the exception handler into a pair of utility functions
v7m_exception_taken() and v7m_push_stack(), which correspond roughly
to the pseudocode PushStack() and ExceptionTaken().

This also requires us to move the arm_v7m_load_vector() utility
routine up so we can call it.

Handling illegal exception returns has some cases where we want to
take a UsageFault either on an existing stack frame or with a new
stack frame but with a specific LR value, so we want to be able to
call these without having to go via arm_v7m_cpu_do_interrupt().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Michael Davidsaver
14790f730a armv7m: VECTCLRACTIVE and VECTRESET are UNPREDICTABLE
The VECTCLRACTIVE and VECTRESET bits in the AIRCR are both
documented as UNPREDICTABLE if you write a 1 to them when
the processor is not halted in Debug state (ie stopped
and under the control of an external JTAG debugger).
Since we don't implement Debug state or emulated JTAG
these bits are always UNPREDICTABLE for us. Instead of
logging them as unimplemented we can simply log writes
as guest errors and ignore them.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: change extracted from another patch; commit message
 constructed from scratch]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Michael Davidsaver
a25dc805e2 armv7m: Simpler and faster exception start
All the places in armv7m_cpu_do_interrupt() which pend an
exception in the NVIC are doing so for synchronous
exceptions. We know that we will always take some
exception in this case, so we can just acknowledge it
immediately, rather than returning and then immediately
being called again because the NVIC has raised its outbound
IRQ line.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: tweaked commit message; added DEBUG to the set of
exceptions we handle immediately, since it is synchronous
when it results from the BKPT instruction]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Peter Maydell
a5d8235545 armv7m: Remove unused armv7m_nvic_acknowledge_irq() return value
Having armv7m_nvic_acknowledge_irq() return the new value of
env->v7m.exception and its one caller assign the return value
back to env->v7m.exception is pointless. Just make the return
type void instead.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Michael Davidsaver
a73c98e159 armv7m: Escalate exceptions to HardFault if necessary
The v7M exception architecture requires that if a synchronous
exception cannot be taken immediately (because it is disabled
or at too low a priority) then it should be escalated to
HardFault (and the HardFault exception is then taken).
Implement this escalation logic.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: extracted from another patch]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Michael Davidsaver
7c14b3ac07 arm: gic: Remove references to NVIC
Now that the NVIC is its own separate implementation, we can
clean up the GIC code by removing REV_NVIC and conditionals
which use it.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Peter Maydell
7ecdaa4a96 armv7m: Fix condition check for taking exceptions
The M profile condition for when we can take a pending exception or
interrupt is not the same as that for A/R profile.  The code
originally copied from the A/R profile version of the
cpu_exec_interrupt function only worked by chance for the
very simple case of exceptions being masked by PRIMASK.
Replace it with a call to a function in the NVIC code that
correctly compares the priority of the pending exception
against the current execution priority of the CPU.

[Michael Davidsaver's patchset had a patch to do something
similar but the implementation ended up being a rewrite.]

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Michael Davidsaver
da6d674e50 armv7m: Rewrite NVIC to not use any GIC code
Despite some superficial similarities of register layout, the
M-profile NVIC is really very different from the A-profile GIC.
Our current attempt to reuse the GIC code means that we have
significant bugs in our NVIC.

Implement the NVIC as an entirely separate device, to give
us somewhere we can get the behaviour correct.

This initial commit does not attempt to implement exception
priority escalation, since the GIC-based code didn't either.
It does fix a few bugs in passing:
 * ICSR.RETTOBASE polarity was wrong and didn't account for
   internal exceptions
 * ICSR.VECTPENDING was 16 too high if the pending exception
   was for an external interrupt
 * UsageFault, BusFault and MemFault were not disabled on reset
   as they are supposed to be

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: reworked, various bugs and stylistic cleanups]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Peter Maydell
1004102a77 armv7m: Implement reading and writing of PRIGROUP
Add a state field for the v7M PRIGROUP register and implent
reading and writing it. The current NVIC doesn't honour
the values written, but the new version will.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:16 +00:00
Peter Maydell
f797c07507 armv7m: Rename nvic_state to NVICState
Rename the nvic_state struct to NVICState, to match
our naming conventions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:16 +00:00
Kurban Mallachiev
c98c9eba88 ARM i.MX timers: fix reset handling
The i.MX timer device can be reset by writing to the SWR bit
of the CR register. This has to behave differently from hard
(power-on) reset because it does not reset all of the bits
in the CR register.

We were incorrectly implementing soft reset and hard reset
the same way, and in addition had a logic error which meant
that we were clearing the bits that soft-reset is supposed
to preserve and not touching the bits that soft-reset clears.
This was not correct behaviour for either kind of reset.

Separate out the soft reset and hard reset code paths, and
correct the handling of reset of the CR register so that it
is correct in both cases.

Signed-off-by: Kurban Mallachiev <mallachiev@ispras.ru>
[PMM: rephrased commit message, spacing on operators;
 use bool rather than int for is_soft_reset]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:16 +00:00
Eric Auger
ccc11b0279 hw/arm/virt: Add a user option to disallow ITS instantiation
In 2.9 ITS will block save/restore and migration use cases. As such,
let's introduce a user option that allows to turn its instantiation
off, along with GICv3. With the "its" option turned false, migration
will be possible, obviously at the expense of MSI support (with GICv3).

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487681108-14452-1-git-send-email-eric.auger@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:16 +00:00
Peter Maydell
44d7ce0ef3 cputlb: Don't assume do_unassigned_access() never returns
In get_page_addr_code(), if the guest PC doesn't correspond to RAM
then we currently run the CPU's do_unassigned_access() hook if it has
one, and otherwise we give up and exit QEMU with a more-or-less
useful message.  This code assumes that the do_unassigned_access hook
will never return, because if it does then we'll plough on attempting
to use a non-RAM TLB entry to get a RAM address and will abort() in
qemu_ram_addr_from_host_nofail().  Unfortunately some CPU
implementations of this hook do return: Microblaze, SPARC and the ARM
v7M.

Change the code to call report_bad_exec() if the hook returns, as
well as if it didn't have one.  This means we can tidy it up to use
the cpu_unassigned_access() function which wraps the "get the CPU
class and call the hook if it has one" work, since we aren't trying
to distinguish "no hook" from "hook existed and returned" any more.

This brings the handling of this hook into line with the handling
used for data accesses, where "hook returned" is treated the
same as "no hook existed" and gets you the default behaviour.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-28 12:08:15 +00:00
Nick Reilly
a4f5c5b723 Add missing fp_access_check() to aarch64 crypto instructions
The aarch64 crypto instructions for AES and SHA are missing the
check for if the FPU is enabled.

Signed-off-by: Nick Reilly <nreilly@blackberry.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:15 +00:00
Igor Mammedov
dbb74759fa hw/arm/virt: fix cpu object reference leak
object_new(FOO) returns an object with ref_cnt == 1
and following
  object_property_set_bool(cpuobj, true, "realized", NULL)
set parent of cpuobj to '/machine/unattached' which makes
ref_cnt == 2.

Since machvirt_init() doesn't take ownership of cpuobj
returned by object_new() it should explicitly drop
reference to cpuobj when dangling pointer is about to
go out of scope like it's done pc_new_cpu() to avoid
object leak.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-id: 1487253461-269218-1-git-send-email-imammedo@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:15 +00:00
Prasad J Pandit
241999bf4c sd: sdhci: Remove block count enable check in single block transfers
In SDHCI protocol, the 'Block count enable' bit of the Transfer
Mode register is relevant only in multi block transfers. We need
not check it in single block transfers.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-id: 20170214185225.7994-5-ppandit@redhat.com
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:15 +00:00
Prasad J Pandit
45ba9f761b sd: sdhci: conditionally invoke multi block transfer
In sdhci_write invoke multi block transfer if it is enabled
in the transfer mode register 's->trnmod'.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-id: 20170214185225.7994-4-ppandit@redhat.com
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:14 +00:00
Prasad J Pandit
6e86d90352 sd: sdhci: check transfer mode register in multi block transfer
In the SDHCI protocol, the transfer mode register value
is used during multi block transfer to check if block count
register is enabled and should be updated. Transfer mode
register could be set such that, block count register would
not be updated, thus leading to an infinite loop. Add check
to avoid it.

Reported-by: Wjjzhang <wjjzhang@tencent.com>
Reported-by: Jiang Xin <jiangxin1@huawei.com>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-id: 20170214185225.7994-3-ppandit@redhat.com
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:14 +00:00
Prasad J Pandit
8b20aefac4 sd: sdhci: mask transfer mode register value
In SDHCI protocol, the transfer mode register is defined
to be of 6 bits. Mask its value with '0x0037' so that an
invalid value could not be assigned.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Message-id: 20170214185225.7994-2-ppandit@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:14 +00:00
Peter Maydell
373442ea3a bcm2835_rng: Use qcrypto_random_bytes() rather than rand()
Switch to using qcrypto_random_bytes() rather than rand() as
our source of randomness for the BCM2835 RNG.

If qcrypto_random_bytes() fails, we don't want to return the guest a
non-random value in case they're really using it for cryptographic
purposes, so the best we can do is a fatal error.  This shouldn't
happen unless something's broken, though.

In theory we could implement this device's full FIFO and interrupt
semantics and then just stop filling the FIFO.  That's a lot of work,
though, and doesn't really give a very nice diagnostic to the user
since the guest will just seem to hang.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-28 12:08:14 +00:00
Marcin Chojnacki
54a5ba13a9 target-arm: Implement BCM2835 hardware RNG
Recent vanilla Raspberry Pi kernels started to make use of
the hardware random number generator in BCM2835 SoC. As a
result, those kernels wouldn't work anymore under QEMU
but rather just freeze during the boot process.

This patch implements a trivial BCM2835 compatible RNG,
and adds it as a peripheral to BCM2835 platform, which
allows to boot a vanilla Raspberry Pi kernel under Qemu.

Changes since v1:
 * Prevented guest from writing [31..20] bits in rng_status
 * Removed redundant minimum_version_id_old
 * Added field entries for the state
 * Changed realize function to reset

Signed-off-by: Marcin Chojnacki <marcinch7@gmail.com>
Message-id: 20170210210857.47893-1-marcinch7@gmail.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:13 +00:00
Peter Maydell
105d86ff38 Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-upstream-pull-request' into staging
# gpg: Signature made Mon 27 Feb 2017 22:15:47 GMT
# gpg:                using RSA key 0xF30C38BD3F2FBE3C
# gpg: Good signature from "Laurent Vivier <lvivier@redhat.com>"
# gpg:                 aka "Laurent Vivier <laurent@vivier.eu>"
# gpg:                 aka "Laurent Vivier (Red Hat) <lvivier@redhat.com>"
# Primary key fingerprint: CD2F 75DD C8E3 A4DC 2E4F  5173 F30C 38BD 3F2F BE3C

* remotes/vivier2/tags/linux-user-for-upstream-pull-request:
  syscall: fixed mincore(2) not failing with ENOMEM
  linux-user: fix do_rt_sigreturn on m68k linux userspace emulation
  linux-user: correctly manage SR in ucontext
  linux-user: Add signal handling support for x86_64
  linux-user: Add sockopts for IPv6 ping and IPv6 traceroute
  linux-user: fix fork()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:03:36 +00:00
Daniel P. Berrange
07e95cd529 io: fully parse & validate HTTP headers for websocket protocol handshake
The current websockets protocol handshake code is very relaxed, just
doing crude string searching across the HTTP header data. This causes
it to both reject valid connections and fail to reject invalid
connections. For example, according to the RFC 6455 it:

 - MUST reject any method other than "GET"
 - MUST reject any HTTP version less than "HTTP/1.1"
 - MUST reject Connection header without "Upgrade" listed
 - MUST reject Upgrade header which is not 'websocket'
 - MUST reject missing Host header
 - MUST treat HTTP header names as case insensitive

To do all this validation correctly requires that we fully parse the
HTTP headers, populating a data structure containing the header
fields.

After this change, we also reject any path other than '/'

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-28 11:51:16 +00:00
Dr. David Alan Gilbert
665414ad06 postcopy: Add extra check for COPY function
As an extra sanity check, make sure the region we're registering
can perform UFFDIO_COPY;  the COPY will fail later but this
gives a cleaner failure.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-17-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
0c1f4036db postcopy: Add doc about hugepages and postcopy
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-16-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
7e8cafb713 postcopy: Check for userfault+hugepage feature
We need extra Linux kernel support (~4.11) to support userfaults
on hugetlbfs; check for them.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-15-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
61a502128b postcopy: Update userfaultfd.h header
Just the userfaultfd.h update from Paolo's header
update run;

* Drop this patch after Paolo's update goes in *

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170224182844.32452-14-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
433bd0223c postcopy: Allow hugepages
Allow huge pages in postcopy.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-13-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
4c011c37ec postcopy: Send whole huge pages
The RAM save code uses ram_save_host_page to send whole
host pages at a time;  change this to use the host page size associated
with the RAM Block which may be a huge page.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-12-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
332847f075 postcopy: Mask fault addresses to huge page boundary
Currently the fault address received by userfault is rounded to
the host page boundary and a host page is requested from the source.
Use the current RAMBlock page size instead of the general host page
size so that for RAMBlocks backed by huge pages we request the whole
huge page.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-11-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:24 +00:00
Dr. David Alan Gilbert
28abd20014 postcopy: Load huge pages in one go
The existing postcopy RAM load loop already ensures that it
glues together whole host-pages from the target page size chunks sent
over the wire.  Modify the definition of host page that it uses
to be the RAM block page size and thus be huge pages where appropriate.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-10-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
41d84210d4 postcopy: Use temporary for placing zero huge pages
The kernel can't do UFFDIO_ZEROPAGE for huge pages, so we have
to allocate a temporary (always zero) page and use UFFDIO_COPYPAGE
on it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-9-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
df9ff5e1e3 postcopy: Plumb pagesize down into place helpers
Now we deal with normal size pages and huge pages we need
to tell the place handlers the size we're dealing with
and make sure the temporary page is large enough.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-8-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
67f11b5c23 postcopy: Record largest page size
Record the largest page size in use; we'll need it soon for allocating
temporary buffers.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-7-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
e2fa71f527 postcopy: enhance ram_block_discard_range for hugepages
Unfortunately madvise DONTNEED doesn't work on hugepagetlb
so use fallocate(FALLOC_FL_PUNCH_HOLE)
qemu_fd_getpagesize only sets the page based off a file
if the file is from hugetlbfs.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-6-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
d3a5038c46 exec: ram_block_discard_range
Create ram_block_discard_range in exec.c to replace
postcopy_ram_discard_range and most of ram_discard_range.

Those two routines are a bit of a weird combination, and
ram_discard_range is about to get more complex for hugepages.
It's OS dependent code (so shouldn't be in migration/ram.c) but
it needs quite a bit of the innards of RAMBlock so doesn't belong in
the os*.c.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-5-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
29c5917201 postcopy: Chunk discards for hugepages
At the start of the postcopy phase, partially sent huge pages
must be discarded.  The code for dealing with host page sizes larger
than the target page size can be reused for this case.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170224182844.32452-4-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
ef08fb389f postcopy: Transmit and compare individual page sizes
When using postcopy with hugepages, we require the source
and destination page sizes for any RAMBlock to match; note
that different RAMBlocks in the same VM can have different
page sizes.

Transmit them as part of the RAM information header and
fail if there's a difference.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170224182844.32452-3-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
e8ca1db29b postcopy: Transmit ram size summary word
Replace the host page-size in the 'advise' command by a pagesize
summary bitmap; if the VM is just using normal RAM then
this will be exactly the same as before, however if they're using
huge pages they'll be different, and thus:
   a) Migration from/to old qemu's that don't understand huge pages
      will fail early.
   b) Migrations with different size RAMBlocks will also fail early.

This catches it very early; earlier than the detailed per-block
check in the next patch.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20170224182844.32452-2-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Vladimir Sementsov-Ogievskiy
f9c8caa04f migration: fix use-after-free of to_dst_file
hmp_savevm calls qemu_savevm_state(f), which sets to_dst_file=f in
global migration state. Then hmp_savevm closes f (g_free called).

Next access to to_dst_file in migration state (for example,
qmp_migrate_set_speed) will use it after it was freed.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170225193155.447462-5-vsementsov@virtuozzo.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:23 +00:00
Dr. David Alan Gilbert
5f9412bbac migration: Update docs to discourage version bumps
Version bumps break backwards migration; update the docs
to explain to people that's bad and how to avoid it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20170210110359.8210-1-dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Marc-André Lureau
128e4e1089 migration: fix id leak regression
This leak was introduced in commit
581f08bac2.

(it stands out quickly with ASAN once the rest of the leaks are also
removed from make check with this series)

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Juan Quintela <quintela@redhat.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170221141451.28305-31-marcandre.lureau@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Ashijeet Acharya
7562f90707 migrate: Introduce a 'dc->vmsd' check to avoid segfault for --only-migratable
Commit a3a3d8c7 introduced a segfault bug while checking for
'dc->vmsd->unmigratable' which caused QEMU to crash when trying to add
devices which do no set their 'dc->vmsd' yet while initialization.
Place a 'dc->vmsd' check prior to it so that we do not segfault for
such devices.

NOTE: This doesn't compromise the functioning of --only-migratable
option as all the unmigratable devices do set their 'dc->vmsd'.

Introduce a new function check_migratable() and move the
only_migratable check inside it, also use stubs to avoid user-mode qemu
build failures.

Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
Message-Id: <1487009088-23891-1-git-send-email-ashijeetacharya@gmail.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Laurent Vivier
9cd49026aa vmstate-static-checker: update white list with spapr_pci
To fix migration between 2.7 and 2.8, some fields have
been renamed and managed with the help of a PHB property
(pre_2_8_migration):

    5c4537b spapr: Fix 2.7<->2.8 migration of PCI host bridge

So we need to add them to the white list:

    dma_liobn[0],
    mem_win_addr, mem_win_size,
    io_win_addr, io_win_size

become

    mig_liobn,
    mig_mem_win_addr, mig_mem_win_size,
    mig_io_win_addr, mig_io_win_size

CC: David Gibson <david@gibson.dropbear.id.au>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Thomas Huth <thuth@redhat.com>
CC: Greg Kurz <groug@kaod.org>
CC: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170214133331.28997-1-lvivier@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Halil Pasic
4333309961 tests/test-vmstate.c: test array of ptr to primitive
Let's have a test for ptr arrays to some primitive type with some
not-null and null ptrs intermixed.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

Message-Id: <20170222160119.52771-6-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:30:22 +00:00
Halil Pasic
cc95883185 tests/test-vmstate.c: test array of ptr with null
Add test for VMSTATE_ARRAY_OF_POINTER_TO_STRUCT with an array
containing some null pointer.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-5-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
   Fixed type case in assert to uintptr_t rather than uint64_t
2017-02-28 11:30:06 +00:00
Halil Pasic
07d4e69147 migration/vmstate: fix array of ptr with nullptrs
Make VMS_ARRAY_OF_POINTER cope with null pointers. Previously the
reward for trying to migrate an array with some null pointers in it was
an illegal memory access, that is a swift and painless death of the
process.  Let's make vmstate cope with this scenario.

The general approach is, when we encounter a null pointer (element),
instead of following the pointer to save/load the data behind it, we
save/load a placeholder. This way we can detect if we expected a null
pointer at the load side but not null data was saved instead.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Guenther Hutzl <hutzl@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-4-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Halil Pasic
cbfda0e6cf migration/vmstate: split up vmstate_base_addr
Currently vmstate_base_addr does several things: it pinpoints the field
within the struct, possibly allocates memory and possibly does the first
pointer dereference. Obviously allocation is needed only for load.

Let us split up the functionality in vmstate_base_addr and move the
address manipulations (that is everything but the allocation logic) to
load and save so it becomes more obvious what is actually going on. Like
this all the address calculations (and the handling of the flags
controlling these) is in one place and the sequence is more obvious.

The newly introduced function vmstate_handle_alloc also fixes the
allocation for the unused VMS_VBUFFER|VMS_MULTIPLY|VMS_ALLOC scenario
and is substantially simpler than the original vmstate_base_addr.

In load and save some asserts are added so it's easier to debug
situations where we would end up with a null pointer dereference.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-3-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Halil Pasic
e84641f73d migration/vmstate: renames in (load|save)_state
The vmstate_(load|save)_state start out with an a void *opaque pointing
to some struct, and manipulate one or more elements of one field within
that struct.

First the field within the struct is pinpointed as opaque + offset, then
if this is a pointer the pointer is dereferenced to obtain a pointer to
the first element of the vmstate field. Pointers to further elements if
any are calculated as first_element + i * element_size (where i is the
zero based index of the element in question).

Currently base_addr and addr is used as a variable name for the pointer
to the first element and the pointer to the current element being
processed. This is suboptimal because base_addr is somewhat
counter-intuitive (because obtained as base + offset) and both base_addr
and addr not very descriptive (that we have a pointer should be clear
from the fact that it is declared as a pointer).

Let make things easier to understand by renaming base_addr to first_elem
and addr to curr_elem. This has the additional benefit of harmonizing
with other names within the scope (n_elems, vmstate_n_elems).

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170222160119.52771-2-pasic@linux.vnet.ibm.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Daniel Henrique Barboza
87c9cc1c30 Changing error message of QMP 'migrate_set_downtime' to seconds
Using QMP, the error message of 'migrate_set_downtime' was displaying
the values in milliseconds, being misleading with the command that
accepts the value in seconds:

{ "execute": "migrate_set_downtime", "arguments": {"value": 3000}}
{"error": {"class": "GenericError", "desc": "Parameter 'downtime_limit'
expects an integer in the range of 0 to 2000000 milliseconds"}}

This message is also seen in HMP when trying to set the same
parameter:

(qemu) migrate_set_parameter downtime-limit 3000000
Parameter 'downtime_limit' expects an integer in the range of 0 to
2000000 milliseconds

To allow for a proper error message when using QMP, a validation
of the user input was added in 'qmp_migrate_set_downtime'.

Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
Message-Id: <20170222151729.5812-1-danielhb@linux.vnet.ibm.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-28 11:29:00 +00:00
Cornelia Huck
e8ebf60f6d pc-bios/s390-ccw.img: rebuild image
Contains the following commits:
- pc-bios/s390-ccw: Use the ccw bios to start the network boot

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
99b72e0fbb pc-bios/s390-ccw: Use the ccw bios to start the network boot
We want to use the ccw bios to start final network boot. To do
this we use ccw bios to detect if the boot device is a virtio
network device and retrieve the start address of the
network boot image.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
f38b5b7fc4 s390x/ipl: Load network boot image
Load the network boot image into guest RAM when the boot
device selected is a network device. Use some of the reserved
space in IplBlockCcw to store the start address of the netboot
image.

A user could also use 'chreipl'(diag 308/5) to change the boot device.
So every time we update the IPLB, we need to verify if the selected
boot device is a network device so we can appropriately load the
network boot image.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
5f31ade055 s390x/ipl: Extend S390IPLState to support network boot
Add new field to S390IPLState to store the name of the network boot
loader.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2017-02-28 12:04:48 +01:00
Farhan Ali
34f1b23f8a elf-loader: Allow late loading of elf
The current QEMU ROM infrastructure rejects late loading of ROMs.
And ELFs are currently loaded as ROM, this prevents delayed loading
of ELFs. So when loading ELF, allow the user to specify if ELF should
be loaded as ROM or not.

If an ELF is not loaded as ROM, then they are not restored on a
guest reboot/reset and so its upto the user to handle the reloading.

Signed-off-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-28 12:04:48 +01:00
Marc-André Lureau
e3d90312b3 tests: fix leaks in test-io-channel-command
No need for strdup, fix leaks when socat is missing.

Spotted by ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-28 10:36:32 +00:00
Daniel P. Berrange
cd892a2efc io: fix decoding when multiple websockets frames arrive at once
The qio_channel_websock_read_wire() method will read upto 4096
bytes off the socket and then decode the websockets header and
payload. The code was only decoding a single websockets frame,
even if the buffered data contained multiple frames. This meant
that decoding of subsequent frames was delayed until further
input arrived on the socket. This backlog of delayed frames
gets worse & worse over time.

Symptom was that when connecting to the VNC server via the
built-in websockets server, mouse/keyboard interaction would
start out fine, but slowly get more & more delayed until it
was unusable.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-28 10:36:31 +00:00
Greg Kurz
c23d5f1d5b 9pfs: local: drop unused code
Now that the all callbacks have been converted to use "at" syscalls, we
can drop this code.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
a565fea565 9pfs: local: open2: don't follow symlinks
The local_open2() callback is vulnerable to symlink attacks because it
calls:

(1) open() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one
(4) local_post_create_passthrough() which calls in turn lchown() and
    chmod(), both functions also following symbolic links

This patch converts local_open2() to rely on opendir_nofollow() and
mkdirat() to fix (1), as well as local_set_xattrat(),
local_set_mapped_file_attrat() and local_set_cred_passthrough() to
fix (2), (3) and (4) respectively. Since local_open2() already opens
a descriptor to the target file, local_set_cred_passthrough() is
modified to reuse it instead of opening a new one.

The mapped and mapped-file security modes are supposed to be identical,
except for the place where credentials and file modes are stored. While
here, we also make that explicit by sharing the call to openat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
3f3a16990b 9pfs: local: mkdir: don't follow symlinks
The local_mkdir() callback is vulnerable to symlink attacks because it
calls:

(1) mkdir() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one
(4) local_post_create_passthrough() which calls in turn lchown() and
    chmod(), both functions also following symbolic links

This patch converts local_mkdir() to rely on opendir_nofollow() and
mkdirat() to fix (1), as well as local_set_xattrat(),
local_set_mapped_file_attrat() and local_set_cred_passthrough() to
fix (2), (3) and (4) respectively.

The mapped and mapped-file security modes are supposed to be identical,
except for the place where credentials and file modes are stored. While
here, we also make that explicit by sharing the call to mkdirat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
d815e72190 9pfs: local: mknod: don't follow symlinks
The local_mknod() callback is vulnerable to symlink attacks because it
calls:

(1) mknod() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one
(4) local_post_create_passthrough() which calls in turn lchown() and
    chmod(), both functions also following symbolic links

This patch converts local_mknod() to rely on opendir_nofollow() and
mknodat() to fix (1), as well as local_set_xattrat() and
local_set_mapped_file_attrat() to fix (2) and (3) respectively.

A new local_set_cred_passthrough() helper based on fchownat() and
fchmodat_nofollow() is introduced as a replacement to
local_post_create_passthrough() to fix (4).

The mapped and mapped-file security modes are supposed to be identical,
except for the place where credentials and file modes are stored. While
here, we also make that explicit by sharing the call to mknodat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
38771613ea 9pfs: local: symlink: don't follow symlinks
The local_symlink() callback is vulnerable to symlink attacks because it
calls:

(1) symlink() which follows symbolic links for all path elements but the
    rightmost one
(2) open(O_NOFOLLOW) which follows symbolic links for all path elements but
    the rightmost one
(3) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(4) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one

This patch converts local_symlink() to rely on opendir_nofollow() and
symlinkat() to fix (1), openat(O_NOFOLLOW) to fix (2), as well as
local_set_xattrat() and local_set_mapped_file_attrat() to fix (3) and
(4) respectively.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
d369f20763 9pfs: local: chown: don't follow symlinks
The local_chown() callback is vulnerable to symlink attacks because it
calls:

(1) lchown() which follows symbolic links for all path elements but the
    rightmost one
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one

This patch converts local_chown() to rely on open_nofollow() and
fchownat() to fix (1), as well as local_set_xattrat() and
local_set_mapped_file_attrat() to fix (2) and (3) respectively.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
e3187a45dd 9pfs: local: chmod: don't follow symlinks
The local_chmod() callback is vulnerable to symlink attacks because it
calls:

(1) chmod() which follows symbolic links for all path elements
(2) local_set_xattr()->setxattr() which follows symbolic links for all
    path elements
(3) local_set_mapped_file_attr() which calls in turn local_fopen() and
    mkdir(), both functions following symbolic links for all path
    elements but the rightmost one

We would need fchmodat() to implement AT_SYMLINK_NOFOLLOW to fix (1). This
isn't the case on linux unfortunately: the kernel doesn't even have a flags
argument to the syscall :-\ It is impossible to fix it in userspace in
a race-free manner. This patch hence converts local_chmod() to rely on
open_nofollow() and fchmod(). This fixes the vulnerability but introduces
a limitation: the target file must readable and/or writable for the call
to openat() to succeed.

It introduces a local_set_xattrat() replacement to local_set_xattr()
based on fsetxattrat() to fix (2), and a local_set_mapped_file_attrat()
replacement to local_set_mapped_file_attr() based on local_fopenat()
and mkdirat() to fix (3). No effort is made to factor out code because
both local_set_xattr() and local_set_mapped_file_attr() will be dropped
when all users have been converted to use the "at" versions.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
ad0b46e6ac 9pfs: local: link: don't follow symlinks
The local_link() callback is vulnerable to symlink attacks because it calls:

(1) link() which follows symbolic links for all path elements but the
    rightmost one
(2) local_create_mapped_attr_dir()->mkdir() which follows symbolic links
    for all path elements but the rightmost one

This patch converts local_link() to rely on opendir_nofollow() and linkat()
to fix (1), mkdirat() to fix (2).

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
6dd4b1f1d0 9pfs: local: improve error handling in link op
When using the mapped-file security model, we also have to create a link
for the metadata file if it exists. In case of failure, we should rollback.

That's what this patch does.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
d2767edec5 9pfs: local: rename: use renameat
The local_rename() callback is vulnerable to symlink attacks because it
uses rename() which follows symbolic links in all path elements but the
rightmost one.

This patch simply transforms local_rename() into a wrapper around
local_renameat() which is symlink-attack safe.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
99f2cf4b2d 9pfs: local: renameat: don't follow symlinks
The local_renameat() callback is currently a wrapper around local_rename()
which is vulnerable to symlink attacks.

This patch rewrites local_renameat() to have its own implementation, based
on local_opendir_nofollow() and renameat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
f9aef99b3e 9pfs: local: lstat: don't follow symlinks
The local_lstat() callback is vulnerable to symlink attacks because it
calls:

(1) lstat() which follows symbolic links in all path elements but the
    rightmost one
(2) getxattr() which follows symbolic links in all path elements
(3) local_mapped_file_attr()->local_fopen()->openat(O_NOFOLLOW) which
    follows symbolic links in all path elements but the rightmost
    one

This patch converts local_lstat() to rely on opendir_nofollow() and
fstatat(AT_SYMLINK_NOFOLLOW) to fix (1), fgetxattrat_nofollow() to
fix (2).

A new local_fopenat() helper is introduced as a replacement to
local_fopen() to fix (3). No effort is made to factor out code
because local_fopen() will be dropped when all users have been
converted to call local_fopenat().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
bec1e9546e 9pfs: local: readlink: don't follow symlinks
The local_readlink() callback is vulnerable to symlink attacks because it
calls:

(1) open(O_NOFOLLOW) which follows symbolic links for all path elements but
    the rightmost one
(2) readlink() which follows symbolic links for all path elements but the
    rightmost one

This patch converts local_readlink() to rely on open_nofollow() to fix (1)
and opendir_nofollow(), readlinkat() to fix (2).

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
ac125d993b 9pfs: local: truncate: don't follow symlinks
The local_truncate() callback is vulnerable to symlink attacks because
it calls truncate() which follows symbolic links in all path elements.

This patch converts local_truncate() to rely on open_nofollow() and
ftruncate() instead.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
31e51d1c15 9pfs: local: statfs: don't follow symlinks
The local_statfs() callback is vulnerable to symlink attacks because it
calls statfs() which follows symbolic links in all path elements.

This patch converts local_statfs() to rely on open_nofollow() and fstatfs()
instead.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
a33eda0dd9 9pfs: local: utimensat: don't follow symlinks
The local_utimensat() callback is vulnerable to symlink attacks because it
calls qemu_utimens()->utimensat(AT_SYMLINK_NOFOLLOW) which follows symbolic
links in all path elements but the rightmost one or qemu_utimens()->utimes()
which follows symbolic links for all path elements.

This patch converts local_utimensat() to rely on opendir_nofollow() and
utimensat(AT_SYMLINK_NOFOLLOW) directly instead of using qemu_utimens().
It is hence assumed that the OS supports utimensat(), i.e. has glibc 2.6
or higher and linux 2.6.22 or higher, which seems reasonable nowadays.

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
a0e640a872 9pfs: local: remove: don't follow symlinks
The local_remove() callback is vulnerable to symlink attacks because it
calls:

(1) lstat() which follows symbolic links in all path elements but the
    rightmost one
(2) remove() which follows symbolic links in all path elements but the
    rightmost one

This patch converts local_remove() to rely on opendir_nofollow(),
fstatat(AT_SYMLINK_NOFOLLOW) to fix (1) and unlinkat() to fix (2).

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
df4938a665 9pfs: local: unlinkat: don't follow symlinks
The local_unlinkat() callback is vulnerable to symlink attacks because it
calls remove() which follows symbolic links in all path elements but the
rightmost one.

This patch converts local_unlinkat() to rely on opendir_nofollow() and
unlinkat() instead.

Most of the code is moved to a separate local_unlinkat_common() helper
which will be reused in a subsequent patch to fix the same issue in
local_remove().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
72f0d0bf51 9pfs: local: lremovexattr: don't follow symlinks
The local_lremovexattr() callback is vulnerable to symlink attacks because
it calls lremovexattr() which follows symbolic links in all path elements
but the rightmost one.

This patch introduces a helper to emulate the non-existing fremovexattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to lremovexattr().

local_lremovexattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
3e36aba757 9pfs: local: lsetxattr: don't follow symlinks
The local_lsetxattr() callback is vulnerable to symlink attacks because
it calls lsetxattr() which follows symbolic links in all path elements but
the rightmost one.

This patch introduces a helper to emulate the non-existing fsetxattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to lsetxattr().

local_lsetxattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
5507904e36 9pfs: local: llistxattr: don't follow symlinks
The local_llistxattr() callback is vulnerable to symlink attacks because
it calls llistxattr() which follows symbolic links in all path elements but
the rightmost one.

This patch introduces a helper to emulate the non-existing flistxattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to llistxattr().

local_llistxattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
56ad3e54da 9pfs: local: lgetxattr: don't follow symlinks
The local_lgetxattr() callback is vulnerable to symlink attacks because
it calls lgetxattr() which follows symbolic links in all path elements but
the rightmost one.

This patch introduces a helper to emulate the non-existing fgetxattrat()
function: it is implemented with /proc/self/fd which provides a trusted
path that can be safely passed to lgetxattr().

local_lgetxattr() is converted to use this helper and opendir_nofollow().

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
996a0d76d7 9pfs: local: open/opendir: don't follow symlinks
The local_open() and local_opendir() callbacks are vulnerable to symlink
attacks because they call:

(1) open(O_NOFOLLOW) which follows symbolic links in all path elements but
    the rightmost one
(2) opendir() which follows symbolic links in all path elements

This patch converts both callbacks to use new helpers based on
openat_nofollow() to only open files and directories if they are
below the virtfs shared folder

This partly fixes CVE-2016-9602.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
0e35a37829 9pfs: local: keep a file descriptor on the shared folder
This patch opens the shared folder and caches the file descriptor, so that
it can be used to do symlink-safe path walk.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:15 +01:00
Greg Kurz
6482a96163 9pfs: introduce relative_openat_nofollow() helper
When using the passthrough security mode, symbolic links created by the
guest are actual symbolic links on the host file system.

Since the resolution of symbolic links during path walk is supposed to
occur on the client side. The server should hence never receive any path
pointing to an actual symbolic link. This isn't guaranteed by the protocol
though, and malicious code in the guest can trick the server to issue
various syscalls on paths whose one or more elements are symbolic links.
In the case of the "local" backend using the "passthrough" or "none"
security modes, the guest can directly create symbolic links to arbitrary
locations on the host (as per spec). The "mapped-xattr" and "mapped-file"
security modes are also affected to a lesser extent as they require some
help from an external entity to create actual symbolic links on the host,
i.e. another guest using "passthrough" mode for example.

The current code hence relies on O_NOFOLLOW and "l*()" variants of system
calls. Unfortunately, this only applies to the rightmost path component.
A guest could maliciously replace any component in a trusted path with a
symbolic link. This could allow any guest to escape a virtfs shared folder.

This patch introduces a variant of the openat() syscall that successively
opens each path element with O_NOFOLLOW. When passing a file descriptor
pointing to a trusted directory, one is guaranteed to be returned a
file descriptor pointing to a path which is beneath the trusted directory.
This will be used by subsequent patches to implement symlink-safe path walk
for any access to the backend.

Symbolic links aren't the only threats actually: a malicious guest could
change a path element to point to other types of file with undesirable
effects:
- a named pipe or any other thing that would cause openat() to block
- a terminal device which would become QEMU's controlling terminal

These issues can be addressed with O_NONBLOCK and O_NOCTTY.

Two helpers are introduced: one to open intermediate path elements and one
to open the rightmost path element.

Suggested-by: Jann Horn <jannh@google.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
(renamed openat_nofollow() to relative_openat_nofollow(),
 assert path is relative and doesn't contain '//',
 fixed side-effect in assert, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 11:21:15 +01:00
Greg Kurz
21328e1e57 9pfs: remove side-effects in local_open() and local_opendir()
If these functions fail, they should not change *fs. Let's use local
variables to fix this.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:14 +01:00
Greg Kurz
00c90bd1c2 9pfs: remove side-effects in local_init()
If this function fails, it should not modify *ctx.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:14 +01:00
Greg Kurz
56fc494bdc 9pfs: local: move xattr security ops to 9p-xattr.c
These functions are always called indirectly. It really doesn't make sense
for them to sit in a header file.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-28 11:21:14 +01:00
Pradeep Jagadeesh
a2a7862ca9 throttle: factor out duplicate code
This patch removes the redundant throttle code that was present in
block and fsdev device files. Now the common code is moved
to a single file.

Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
(fix indent nit, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 10:31:46 +01:00
Pradeep Jagadeesh
b8bbdb886e fsdev: add IO throttle support to fsdev devices
This patchset adds the throttle support for the 9p-local driver.
For now this functionality can be enabled only through qemu cli options.
QMP interface and support to other drivers need further extensions.
To make it simple for other 9p drivers, the throttle code has been put in
separate files.

Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
(pass extra NULL CoMutex * argument to qemu_co_queue_wait(),
 added options to qemu-options.hx, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 10:31:46 +01:00
Paolo Bonzini
4bae2b397f 9pfs: fix v9fs_lock error case
In this case, we are marshaling an error status instead of the errno value.
Reorganize the out and out_nofid labels to look like all the other cases.
Coverity reports this because the "err = -ENOENT" and "err = -EINVAL"
assignments above are dead, overwritten by the call to pdu_marshal.

(Coverity issues CID1348512 and CID1348513)

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(also open-coded the success path since locking is a nop for us, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 10:31:46 +01:00
Gerd Hoffmann
8779fccbef seabios: update to 1.10.2 release
git shortlog rel-1.10.1..rel-1.10.2
===================================

Ben Warren (5):
      QEMU DMA: Add DMA write capability
      romfile-loader: Switch to using named structs
      QEMU fw_cfg: Add command to write back address of file
      QEMU fw_cfg: Add functions for accessing files by key
      QEMU fw_cfg: Write fw_cfg back on S3 resume

Kevin O'Connor (1):
      ps2port: Disable keyboard/mouse prior to resetting ps2 controller

Ladi Prosek (1):
      ahci: Set upper 32-bit registers to zero

Paul Menzel (1):
      vgasrc: Increase debug level

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-28 09:54:23 +01:00
Peter Maydell
6181478f63 Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Mon 27 Feb 2017 16:33:23 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  tests-aio-multithread: use atomic_read properly
  iscsi: do not use aio_context_acquire/release
  nfs: do not use aio_context_acquire/release
  curl: do not use aio_context_acquire/release

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 08:46:03 +00:00
Franklin \"Snaipe\" Mathieu
f5507e0448 syscall: fixed mincore(2) not failing with ENOMEM
The current implementation of the mincore(2) syscall sets errno to
EFAULT when the region identified by the first two parameters is
invalid.

This goes against the man page specification, where mincore(2) should
only fail with EFAULT when the third parameter is an invalid address;
and fail with ENOMEM when the checked region does not point to mapped
memory.

Signed-off-by: Franklin "Snaipe" Mathieu <snaipe@diacritic.io>
Cc: Riku Voipio <riku.voipio@linaro.org>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Igor Pavlikevich
6c608953a5 hw/acpi/tco.c: fix tco timer stop
TCO timer does not actually stop

Signed-off-by: Igor Pavlikevich <ipavlikevich@gmail.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Peter Maydell
3d74ee7dca lm32: milkymist-tmu2: fix a third integer overflow
Don't truncate the multiplication and do a 64 bit one instead
because the result is stored in a 64 bit variable.

This fixes a similar coverity warning to commits 237a8650d6 and
4382fa6554, in a similar way, and is the final third of the fix for
coverity CID 1167561 (hopefully!).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Acked-by: Michael Walle <michael@walle.cc>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Vincenzo Maffione
79cad2faac qemu-options.hx: add missing id=chr0 chardev argument in vhost-user example
In the vhost-user example, a chardev with id chr0 is referenced by the
vhost-user net backend, but the id is not specified in the chardev option.

Signed-off-by: Vincenzo Maffione <v.maffione@gmail.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Marc-André Lureau
d34d5b3bbd Update copyright year
It's still time to wish happy new year!

The Year of the Rooster will begin on January 28, 2017!

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Thomas Huth
6b591ad613 tests/prom-env: Enable the test for the sun4u machine, too
The 32-bit TCG bug has been fixed a while ago, so we can enable
this test for sparc64 now, too. Unfortunately, OpenBIOS does not
work with the sun4v machine anymore (it needs to catch up with the
improved emulation), so we can only enable this test for the sun4u
machine right now.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Fam Zheng
8ea1d05632 cadence_gem: Remove unused parameter debug message
Reported by cppcheck.

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Philippe Mathieu-Daudé
4729b3a41d register: fix incorrect read mask
The register_read() and register_write() functions expect a bitmask argument.
To avoid duplicated code, a new inlined function register_enabled_mask() is
introduced.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
John Snow
6048018ef6 ide: remove undefined behavior in ide-test
trivial: initialize the dirty buffer with a random-ish byte.
Stops valgrind from whining about uninitialized buffers.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Peter Maydell
25ac5bbec4 CODING_STYLE: Mention preferred comment form
Our defacto coding style strongly prefers /* */ style comments
over the single-line // style, and checkpatch enforces this,
but we don't actually document this. Mention it in CODING_STYLE.

Suggested-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Thomas Huth
a6e3707ece hw/core/register: Mark the device with cannot_instantiate_with_device_add_yet
The "qemu,register" device needs to be wired up in source code, there
is no way the user can make any real use of this device with the
"-device" parameter or the "device_add" monitor command yet.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Thomas Huth
a70716eb2c hw/core/or-irq: Mark the device with cannot_instantiate_with_device_add_yet
The "or-irq" device needs to be wired up in source code, there is no
way the user can make any real use of this device with the "-device"
parameter or the "device_add" monitor command yet.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Peter Maydell
d000b477f2 softfloat: Use correct type in float64_to_uint64_round_to_zero()
In float64_to_uint64_round_to_zero() a typo meant that we were
taking the uint64_t return value from float64_to_uint64() and
putting it into an int64_t variable before returning it as
uint64_t again. Use uint64_t instead of pointlessly casting it
back and forth to int64_t.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Stefan Weil
8dc52350f9 target/s390x: Fix typo
Signed-off-by: Stefan Weil <sw@weilnetz.de>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Jeff Cody
51654aa52a iscsi: add missing colons to the qapi docs
The missing colons make the iscsi part of the documentation not render
quite as nicely, so add those in.

Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-27 23:33:41 -05:00
Richard Henderson
5ee4f3c2c7 target/alpha: Enable MTTCG by default
Alpha has a weak memory ordering and issues all of the required barriers.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-28 11:41:46 +11:00
Franklin \"Snaipe\" Mathieu
98a3331a55 syscall: fixed mincore(2) not failing with ENOMEM
The current implementation of the mincore(2) syscall sets errno to
EFAULT when the region identified by the first two parameters is
invalid.

This goes against the man page specification, where mincore(2) should
only fail with EFAULT when the third parameter is an invalid address;
and fail with ENOMEM when the checked region does not point to mapped
memory.

Signed-off-by: Franklin "Snaipe" Mathieu <snaipe@diacritic.io>
Cc: Riku Voipio <riku.voipio@linaro.org>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170217085800.28873-2-snaipe@diacritic.io>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Michael Karcher
59ebb6e451 linux-user: fix do_rt_sigreturn on m68k linux userspace emulation
do_rt_sigreturn uses an uninitialised local variable instead of fetching
the old signal mask directly from the signal frame when restoring the mask,
so the signal mask is undefined after do_rt_sigreturn. As the signal
frame data is in target-endian order, target_to_host_sigset instead of
target_to_host_sigset_internal is required.

do_sigreturn is correct in using target_to_host_sigset_internal, because
get_user already did the endianness conversion.

Signed-off-by: Michael Karcher <karcher@physik.fu-berlin.de>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170225110517.2832-3-laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Laurent Vivier
3219de458c linux-user: correctly manage SR in ucontext
Use cpu_m68k_get_ccr()/cpu_m68k_set_ccr() to setup and restore correctly
the value of SR in the ucontext structure

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170225110517.2832-2-laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Pranith Kumar
1c1df0198b linux-user: Add signal handling support for x86_64
Note that x86_64 has only _rt signal handlers. This implementation
attempts to share code with the x86_32 implementation.

CC: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Allan Wirth <awirth@akamai.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170226165345.8757-1-bobby.prani@gmail.com>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Helge Deller
ee1ac3a182 linux-user: Add sockopts for IPv6 ping and IPv6 traceroute
Add the neccessary sockopts for ping and traceroute on IPv6.

This fixes the following qemu warnings with IPv6:
Unsupported ancillary data: 0/2
Unsupported ancillary data: 0/11
Unsupported ancillary data: 41/25
Unsupported setsockopt level=0 optname=12
Unsupported setsockopt level=41 optname=16
Unsupported setsockopt level=41 optname=25
Unsupported setsockopt level=41 optname=50
Unsupported setsockopt level=41 optname=51
Unsupported setsockopt level=41 optname=8
Unsupported setsockopt level=58 optname=1

Tested with hppa-linux-user (big-endian) on x86_64 (little-endian).

Signed-off-by: Helge Deller <deller@gmx.de>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170218223130.GA25278@ls3530.fritz.box>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Laurent Vivier
7eddb5ddac linux-user: fix fork()
Since commit 5ea2fc8 ("linux-user: Sanity check clone flags"),
trying to run fork() fails with old distro on some architectures.

This is the case with HP-PA and Debian 5 (Lenny).

It fails on:

         if ((flags & CSIGNAL) != TARGET_SIGCHLD) {
             return -TARGET_EINVAL;
         }

because flags is 17, whereas on HP-PA, SIGCHLD is 18.
17 is the SIGCHLD value of my host (x86_64).

It appears that for TARGET_NR_fork and TARGET_NR_vfork, QEMU calls
do_fork() with SIGCHLD instead of TARGET_SIGCHLD.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170216173707.16209-1-laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Peter Maydell
9b9fbe8a4e Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20170227-1' into staging
gtk: fix kbd on xwayland
vnc: fix double free issues
opengl improvements

# gpg: Signature made Mon 27 Feb 2017 16:11:30 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-ui-20170227-1:
  vnc: fix double free issues
  spice: add display & head options
  ui: Use XkbGetMap and XkbGetNames instead of XkbGetKeyboard
  gtk-egl: add scanout_disable support
  sdl2: add scanout_disable support
  spice: add scanout_disable support
  virtio-gpu: use dpy_gl_scanout_disable
  console: add dpy_gl_scanout_disable
  console: rename dpy_gl_scanout to dpy_gl_scanout_texture

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-27 19:19:46 +00:00
John Snow
39c11580f3 block/mirror: fix broken sparseness detection
int64_t is in all likelihood the actual scalar type we want.
Yep, really.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1219541

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-27 14:02:31 -05:00
Eduardo Habkost
b8097deb35 i386: Improve query-cpu-model-expansion full mode
This keeps the same results on type=static expansion, but make
type=full expansion return every single QOM property on the CPU
object that have a different value from the "base' CPU model,
plus all the CPU feature flag properties.

Cc: Jiri Denemark <jdenemar@redhat.com>
Message-Id: <20170222190029.17243-4-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:35 -03:00
Eduardo Habkost
f99fd7ca2a i386: Implement query-cpu-model-expansion QMP command
Implement query-cpu-model-expansion for target-i386.

This should meet all the requirements while being simple. In the
case of static expansion, it will use the new "base" CPU model,
and in the case of full expansion, it will keep the original CPU
model name+props, and append extra properties.

A future follow-up should improve the implementation of
type=full, so that it returns more detailed data, including every
writable QOM property in the CPU object.

Cc: libvir-list@redhat.com
Cc: Jiri Denemark <jdenemar@redhat.com>
Message-Id: <20170222190029.17243-3-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:31 -03:00
Eduardo Habkost
5adbed3088 i386: Define static "base" CPU model
The query-cpu-model-expand QMP command needs at least one static
model, to allow the "static" expansion mode to be implemented.
Instead of defining static versions of every CPU model, define a
"base" CPU model that has absolutely no feature flag enabled.

Despite having no CPUID data set at all, "-cpu base" is even a
functional CPU:

* It can boot a Slackware Linux 1.01 image with a Linux 0.99.12
  kernel[1].
* It is even possible to boot[2] a modern Fedora x86_64 guest by
  manually enabling the following CPU features:
  -cpu base,+lm,+msr,+pae,+fpu,+cx8,+cmov,+sse,+sse2,+fxsr

[1] http://www.qemu-advent-calendar.org/2014/#day-1
[2] This is what can be seen in the guest:
    [root@localhost ~]# cat /proc/cpuinfo
    processor       : 0
    vendor_id       : unknown
    cpu family      : 0
    model           : 0
    model name      : 00/00
    stepping        : 0
    physical id     : 0
    siblings        : 1
    core id         : 0
    cpu cores       : 1
    apicid          : 0
    initial apicid  : 0
    fpu             : yes
    fpu_exception   : yes
    cpuid level     : 1
    wp              : yes
    flags           : fpu msr pae cx8 cmov fxsr sse sse2 lm nopl
    bugs            :
    bogomips        : 5832.70
    clflush size    : 64
    cache_alignment : 64
    address sizes   : 36 bits physical, 48 bits virtual
    power management:

    [root@localhost ~]# x86info -v -a
    x86info v1.30.  Dave Jones 2001-2011
    Feedback to <davej@redhat.com>.

    No TSC, MHz calculation cannot be performed.
    Unknown vendor (0)
    MP Table:

    Family: 0 Model: 0 Stepping: 0
    CPU Model (x86info's best guess):

    eax in: 0x00000000, eax = 00000001 ebx = 00000000 ecx = 00000000 edx = 00000000
    eax in: 0x00000001, eax = 00000000 ebx = 00000800 ecx = 00000000 edx = 07008161

    eax in: 0x80000000, eax = 80000001 ebx = 00000000 ecx = 00000000 edx = 00000000
    eax in: 0x80000001, eax = 00000000 ebx = 00000000 ecx = 00000000 edx = 20000000

    Feature flags:
     fpu            Onboard FPU
     msr            Model-Specific Registers
     pae            Physical Address Extensions
     cx8            CMPXCHG8 instruction
     cmov           CMOV instruction
     fxsr           FXSAVE and FXRSTOR instructions
     sse            SSE support
     sse2           SSE2 support

    Long NOPs supported: yes

    Address sizes : 0 bits physical, 0 bits virtual
    0MHz processor (estimate).

     running at an estimated 0MHz
    [root@localhost ~]#

Message-Id: <20170222190029.17243-2-ehabkost@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:27 -03:00
Eduardo Habkost
0bacd8b304 i386: Don't set CPUClass::cpu_def on "max" model
Host CPUID info is used by the "max" CPU model only in KVM mode.
Move the initialization of CPUID data for "max" from class_init
to instance_init, and don't set CPUClass::cpu_def for "max".

Message-Id: <20170222183919.11928-4-ehabkost@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:25 -03:00
Eduardo Habkost
6900d1cc8a i386: Make "max" model not use any host CPUID info on TCG
Instead of reporting host CPUID data on "max", use the qemu64 CPU
model as reference to initialize CPUID
vendor/family/model/stepping/model-id.

Message-Id: <20170222183919.11928-3-ehabkost@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:23 -03:00
Eduardo Habkost
c62f2630f8 i386: Create "max" CPU model
Rename the existing "host" CPU model to "max, and set it to
kvm_enabled=false. The new "max" CPU model will be able to enable
all features supported by TCG out of the box, because its logic
is based on x86_cpu_get_supported_feature_word(), which already
works with TCG.

A new KVM-specific "host" class was added, that simply inherits
everything from "max" except the 'ordering' and 'description'
fields.

Message-Id: <20170222183919.11928-2-ehabkost@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:21 -03:00
Eduardo Habkost
a357a65b66 qapi-schema: Comment about full expansion of non-migration-safe models
Add a note warning that static expansion may not be 100% accurate
when the CPU model is not migration-safe. This will be the case
on x86 when expansing the "host" CPU model, because there are
"host" features that can't have a migration-safe representation
(e.g. "host-cache-info").

Message-Id: <20170116211124.29245-3-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:13 -03:00
Eduardo Habkost
b8d834a00f i386: Reorganize and document CPUID initialization steps
CPU runnability checks and CPU model expansion have slightly
different requirements. Document the steps involved in loading a
CPU model and realizing a CPU, so their requirements and purpose
are clearly defined.

This patch doesn't change any implementation. It just add
comments, rename the x86_cpu_load_features() function for clarity
(so it won't be confused with x86_cpu_load_def()), and move
x86_cpu_filter_features() closer to it.

Message-Id: <20170116211124.29245-2-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:09 -03:00
Eduardo Habkost
44bd8e5306 i386: Rename X86CPU::host_features to X86CPU::max_features
Rename the field and add a small comment to make its purpose
clearer.

Message-Id: <20170119210449.11991-4-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:06 -03:00
Eduardo Habkost
f48c883703 i386: Add ordering field to CPUClass
Instead of using kvm_enabled to order the "-cpu help" list, use a
new "ordering" field for that.

Message-Id: <20170119210449.11991-3-ehabkost@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:23:03 -03:00
Eduardo Habkost
771a13e90d i386: Unset cannot_destroy_with_object_finalize_yet on "host" model
The class is now safe because the assert(kvm_enabled()) line was
removed by commit e435601058.

Message-Id: <20170119210449.11991-2-ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-02-27 13:22:58 -03:00
Peter Maydell
8f2d7c3411 Merge remote-tracking branch 'remotes/berrange/tags/pull-qcrypto-2017-02-27-1' into staging
Merge qcrypto 2017/02/27 v1

# gpg: Signature made Mon 27 Feb 2017 13:37:34 GMT
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qcrypto-2017-02-27-1:
  crypto: assert cipher algorithm is always valid
  crypto: fix leak in ivgen essiv init

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-27 15:33:21 +00:00
Paolo Bonzini
1ab17f9f5c tests-aio-multithread: use atomic_read properly
nodes[id].next is written by other threads.  If atomic_read is not used
(matching atomic_set in mcs_mutex_lock!) the compiler can optimize the
whole "if" away!

Reported-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Greg Kurz <groug@kaod.org>
Message-id: 20170227111726.9237-1-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 14:00:53 +00:00
Paolo Bonzini
d045c466d9 iscsi: do not use aio_context_acquire/release
Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect libiscsi calls with a QemuMutex.  Callbacks are invoked
using bottom halves, so we don't even have to drop it around
callback invocations.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170222180725.28611-4-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 13:58:58 +00:00
Paolo Bonzini
37d1e4d9bf nfs: do not use aio_context_acquire/release
Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect libnfs calls with a QemuMutex.  Callbacks are invoked
using bottom halves, so we don't even have to drop it around
callback invocations.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170222180725.28611-3-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 13:58:53 +00:00
Prasad J Pandit
32c813e6c2 crypto: assert cipher algorithm is always valid
Crypto routines 'qcrypto_cipher_get_block_len' and
'qcrypto_cipher_get_key_len' return non-zero cipher block and key
lengths from static arrays 'alg_block_len[]' and 'alg_key_len[]'
respectively. Returning 'zero(0)' value from either of them would
likely lead to an error condition.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-27 13:37:14 +00:00
Li Qiang
0072d2a9fc crypto: fix leak in ivgen essiv init
On error path, the 'salt' doesn't been freed thus leading
a memory leak. This patch avoid this.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-27 13:37:14 +00:00
Paolo Bonzini
ba3186c4e4 curl: do not use aio_context_acquire/release
Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect BDRVCURLState access with a QemuMutex.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170222180725.28611-2-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 13:33:24 +00:00
Paolo Bonzini
55ac0a9bf4 cpu-exec: remove unnecessary check of cpu->exit_request
The cpu->exit_request check in cpu_loop_exec_tb is unnecessary,
because cpu->tcg_exit_req is always set after cpu->exit_request.
So let the TB exit and we will pick up the exit request later
in cpu_handle_interrupt.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-24 15:51:19 +01:00
Pavel Dovgalyuk
cfb2d02be9 replay: check icount in cpu exec loop
This patch adds check to break cpu loop when icount expires without
setting the TB_EXIT_ICOUNT_EXPIRED flag. It happens when there is no
available translated blocks and all instructions were executed.
In icount replay mode unnecessary tb_find will be called (which may
cause an exception) and execution will be non-deterministic.
Because cpu_loop_exec_tb cannot longjmp anymore, we can remove
the anticipated call to align_clocks in cpu_loop_exec_tb, as
well as the SyncClocks *sc argument.

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-Id: <002801d2810f$18809c20$4981d460$@ru>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Pavel Dovgalyuk <dovgaluk@ispras.ru>
2017-02-24 15:51:19 +01:00
Max Filippov
cb3825b9af target/xtensa: add two missing headers to core import script
Include qemu/osdep.h and qemu-common.h at the beginning of imported
xtensa core source file.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
2017-02-23 10:50:56 -08:00
Max Filippov
b68755c142 target/xtensa: sim: instantiate local memories
Xtensa core may have a number of RAM and ROM areas configured. Record
their size and location from the core configuration overlay and
instantiate them as RAM regions in the SIM machine.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
2017-02-23 10:30:41 -08:00
Paolo Bonzini
1aab16c28a cpu-exec: unify icount_decr and tcg_exit_req
The icount interrupt flag and tcg_exit_req serve almost the same
purpose, let's make them completely the same.

The former TB_EXIT_REQUESTED and TB_EXIT_ICOUNT_EXPIRED cases are
unified, since we can distinguish them from the value of the
interrupt flag.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-22 14:56:34 +01:00
815 changed files with 22282 additions and 10678 deletions

10
.gitignore vendored
View File

@@ -99,15 +99,15 @@
/pc-bios/optionrom/kvmvapic.img
/pc-bios/s390-ccw/s390-ccw.elf
/pc-bios/s390-ccw/s390-ccw.img
/docs/qemu-ga-qapi.texi
/docs/qemu-ga-ref.html
/docs/qemu-ga-ref.info*
/docs/qemu-ga-ref.txt
/docs/qemu-qmp-qapi.texi
/docs/qemu-qmp-ref.html
/docs/qemu-qmp-ref.info*
/docs/qemu-qmp-ref.txt
docs/qemu-ga-ref.info*
docs/qemu-qmp-ref.info*
/qemu-ga-qapi.texi
/qemu-qapi.texi
/version.texi
/docs/version.texi
*.tps
.stgit-*
cscope.*

View File

@@ -5,6 +5,8 @@ env:
TARGET_LIST=arm-softmmu,arm-linux-user
- IMAGE=debian-arm64-cross
TARGET_LIST=aarch64-softmmu,aarch64-linux-user
- IMAGE=debian-s390x-cross
TARGET_LIST=s390x-softmmu,s390x-linux-user
build:
pre_ci:
- make docker-image-${IMAGE}

View File

@@ -116,3 +116,10 @@ if (a == 1) {
Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read.
Besides, good compilers already warn users when '==' is mis-typed as '=',
even when the constant is on the right.
7. Comment style
We use traditional C-style /* */ comments and avoid // comments.
Rationale: The // form is valid in C99, so this is purely a matter of
consistency of style. The checkpatch script will warn you about this.

View File

@@ -908,6 +908,8 @@ F: hw/acpi/*
F: hw/smbios/*
F: hw/i386/acpi-build.[hc]
F: hw/arm/virt-acpi-build.c
F: tests/bios-tables-test.c
F: tests/acpi-utils.[hc]
ppc4xx
M: Alexander Graf <agraf@suse.de>
@@ -1122,6 +1124,15 @@ F: hw/nvram/chrp_nvram.c
F: include/hw/nvram/chrp_nvram.h
F: tests/prom-env-test.c
VM Generation ID
M: Ben Warren <ben@skyportsystems.com>
S: Maintained
F: hw/acpi/vmgenid.c
F: include/hw/acpi/vmgenid.h
F: docs/specs/vmgenid.txt
F: tests/vmgenid-test.c
F: stubs/vmgenid.c
Subsystems
----------
Audio
@@ -1393,6 +1404,7 @@ F: qmp.c
F: monitor.c
F: docs/*qmp-*
F: scripts/qmp/
F: tests/qmp-test.c
T: git git://repo.or.cz/qemu/armbru.git qapi-next
Register API
@@ -1662,14 +1674,6 @@ S: Supported
F: block/ssh.c
T: git git://github.com/codyprime/qemu-kvm-jtc.git block
ARCHIPELAGO
M: Chrysostomos Nanakos <chris@include.gr>
M: Jeff Cody <jcody@redhat.com>
L: qemu-block@nongnu.org
S: Maintained
F: block/archipelago.c
T: git git://github.com/codyprime/qemu-kvm-jtc.git block
CURL
M: Jeff Cody <jcody@redhat.com>
L: qemu-block@nongnu.org

View File

@@ -50,24 +50,24 @@ endif
include $(SRC_PATH)/rules.mak
GENERATED_HEADERS = qemu-version.h config-host.h qemu-options.def
GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h qapi-event.h
GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
GENERATED_HEADERS += qmp-introspect.h
GENERATED_SOURCES += qmp-introspect.c
GENERATED_FILES = qemu-version.h config-host.h qemu-options.def
GENERATED_FILES += qmp-commands.h qapi-types.h qapi-visit.h qapi-event.h
GENERATED_FILES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
GENERATED_FILES += qmp-introspect.h
GENERATED_FILES += qmp-introspect.c
GENERATED_HEADERS += trace/generated-tcg-tracers.h
GENERATED_FILES += trace/generated-tcg-tracers.h
GENERATED_HEADERS += trace/generated-helpers-wrappers.h
GENERATED_HEADERS += trace/generated-helpers.h
GENERATED_SOURCES += trace/generated-helpers.c
GENERATED_FILES += trace/generated-helpers-wrappers.h
GENERATED_FILES += trace/generated-helpers.h
GENERATED_FILES += trace/generated-helpers.c
ifdef CONFIG_TRACE_UST
GENERATED_HEADERS += trace-ust-all.h
GENERATED_SOURCES += trace-ust-all.c
GENERATED_FILES += trace-ust-all.h
GENERATED_FILES += trace-ust-all.c
endif
GENERATED_HEADERS += module_block.h
GENERATED_FILES += module_block.h
TRACE_HEADERS = trace-root.h $(trace-events-subdirs:%=%/trace.h)
TRACE_SOURCES = trace-root.c $(trace-events-subdirs:%=%/trace.c)
@@ -80,11 +80,15 @@ ifdef CONFIG_TRACE_UST
TRACE_HEADERS += trace-ust-root.h $(trace-events-subdirs:%=%/trace-ust.h)
endif
GENERATED_HEADERS += $(TRACE_HEADERS)
GENERATED_SOURCES += $(TRACE_SOURCES)
GENERATED_FILES += $(TRACE_HEADERS)
GENERATED_FILES += $(TRACE_SOURCES)
GENERATED_FILES += $(BUILD_DIR)/trace-events-all
trace-group-name = $(shell dirname $1 | sed -e 's/[^a-zA-Z0-9]/_/g')
tracetool-y = $(SRC_PATH)/scripts/tracetool.py
tracetool-y += $(shell find $(SRC_PATH)/scripts/tracetool -name "*.py")
%/trace.h: %/trace.h-timestamp
@cmp $< $@ >/dev/null 2>&1 || cp $< $@
%/trace.h-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
@@ -485,11 +489,10 @@ clean:
rm -f fsdev/*.pod
rm -f qemu-img-cmds.h
rm -f ui/shader/*-vert.h ui/shader/*-frag.h
@# May not be present in GENERATED_HEADERS
@# May not be present in GENERATED_FILES
rm -f trace/generated-tracers-dtrace.dtrace*
rm -f trace/generated-tracers-dtrace.h*
rm -f $(foreach f,$(GENERATED_HEADERS),$(f) $(f)-timestamp)
rm -f $(foreach f,$(GENERATED_SOURCES),$(f) $(f)-timestamp)
rm -f $(foreach f,$(GENERATED_FILES),$(f) $(f)-timestamp)
rm -rf qapi-generated
rm -rf qga/qapi-generated
for d in $(ALL_SUBDIRS); do \
@@ -516,7 +519,7 @@ distclean: clean
rm -f qemu-doc.vr qemu-doc.txt
rm -f config.log
rm -f linux-headers/asm
rm -f qemu-ga-qapi.texi qemu-qapi.texi version.texi
rm -f docs/qemu-ga-qapi.texi docs/qemu-qmp-qapi.texi docs/version.texi
rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
@@ -593,8 +596,7 @@ endif
endif
install: all $(if $(BUILD_DOCS),install-doc) $(BUILD_DIR)/trace-events-all \
install-datadir install-localstatedir
install: all $(if $(BUILD_DOCS),install-doc) install-datadir install-localstatedir
ifneq ($(TOOLS),)
$(call install-prog,$(subst qemu-ga,qemu-ga$(EXESUF),$(TOOLS)),$(DESTDIR)$(bindir))
endif
@@ -663,25 +665,28 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
# documentation
MAKEINFO=makeinfo
MAKEINFOFLAGS=--no-split --number-sections
MAKEINFOFLAGS=--no-split --number-sections -I docs
TEXIFLAG=$(if $(V),,--quiet)
version.texi: $(SRC_PATH)/VERSION
docs/version.texi: $(SRC_PATH)/VERSION
$(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
%.html: %.texi version.texi
%.html: %.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--html $< -o $@,"GEN","$@")
%.info: %.texi version.texi
%.info: %.texi
$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
%.txt: %.texi version.texi
%.txt: %.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--plaintext $< -o $@,"GEN","$@")
%.pdf: %.texi version.texi
$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o $@,"GEN","$@")
%.pdf: %.texi
$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I docs $< -o $@,"GEN","$@")
docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.txt docs/qemu-ga-ref.pdf docs/qemu-ga-ref.7.pod: docs/version.texi
docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.pod: docs/version.texi
qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
@@ -695,10 +700,10 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt
qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
qemu-qapi.texi: $(qapi-modules) $(qapi-py)
docs/qemu-qmp-qapi.texi: $(qapi-modules) $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json $(qapi-py)
docs/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
@@ -719,10 +724,10 @@ qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
qemu-monitor-info.texi
docs/qemu-ga-ref.dvi docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.pdf docs/qemu-ga-ref.txt docs/qemu-ga-ref.7: \
docs/qemu-ga-ref.texi qemu-ga-qapi.texi
docs/qemu-ga-ref.texi docs/qemu-ga-qapi.texi
docs/qemu-qmp-ref.dvi docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7: \
docs/qemu-qmp-ref.texi qemu-qapi.texi
docs/qemu-qmp-ref.texi docs/qemu-qmp-qapi.texi
ifdef CONFIG_WIN32
@@ -784,7 +789,7 @@ endif # CONFIG_WIN
# Add a dependency on the generated files, so that they are always
# rebuilt before other object files
ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
Makefile: $(GENERATED_HEADERS)
Makefile: $(GENERATED_FILES)
endif
.SECONDARY: $(TRACE_HEADERS) $(TRACE_HEADERS:%=%-timestamp) \

View File

@@ -162,7 +162,7 @@ else
obj-y += hw/$(TARGET_BASE_ARCH)/
endif
GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h
GENERATED_FILES += hmp-commands.h hmp-commands-info.h
endif # CONFIG_SOFTMMU
@@ -238,5 +238,5 @@ ifdef CONFIG_TRACE_SYSTEMTAP
$(INSTALL_DATA) $(QEMU_PROG)-simpletrace.stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG)-simpletrace.stp"
endif
GENERATED_HEADERS += config-target.h
Makefile: $(GENERATED_HEADERS)
GENERATED_FILES += config-target.h
Makefile: $(GENERATED_FILES)

View File

@@ -1 +1 @@
2.8.50
2.8.90

View File

@@ -28,6 +28,7 @@
#include "qemu/timer.h"
#include "sysemu/sysemu.h"
#include "qemu/cutils.h"
#include "sysemu/replay.h"
#define AUDIO_CAP "audio"
#include "audio_int.h"
@@ -1112,7 +1113,7 @@ static int audio_is_timer_needed (void)
static void audio_reset_timer (AudioState *s)
{
if (audio_is_timer_needed ()) {
timer_mod (s->ts,
timer_mod_anticipate_ns(s->ts,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
}
else {
@@ -1387,6 +1388,7 @@ static void audio_run_out (AudioState *s)
prev_rpos = hw->rpos;
played = hw->pcm_ops->run_out (hw, live);
replay_audio_out(&played);
if (audio_bug (AUDIO_FUNC, hw->rpos >= hw->samples)) {
dolog ("hw->rpos=%d hw->samples=%d played=%d\n",
hw->rpos, hw->samples, played);
@@ -1450,9 +1452,12 @@ static void audio_run_in (AudioState *s)
while ((hw = audio_pcm_hw_find_any_enabled_in (hw))) {
SWVoiceIn *sw;
int captured, min;
int captured = 0, min;
captured = hw->pcm_ops->run_in (hw);
if (replay_mode != REPLAY_MODE_PLAY) {
captured = hw->pcm_ops->run_in(hw);
}
replay_audio_in(&captured, hw->conv_buf, &hw->wpos, hw->samples);
min = audio_pcm_hw_find_min_in (hw);
hw->total_samples_captured += captured - min;

View File

@@ -166,4 +166,9 @@ int wav_start_capture (CaptureState *s, const char *path, int freq,
bool audio_is_cleaning_up(void);
void audio_cleanup(void);
void audio_sample_to_uint64(void *samples, int pos,
uint64_t *left, uint64_t *right);
void audio_sample_from_uint64(void *samples, int pos,
uint64_t left, uint64_t right);
#endif /* QEMU_AUDIO_H */

View File

@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/bswap.h"
#include "qemu/error-report.h"
#include "audio.h"
#define AUDIO_CAP "mixeng"
@@ -267,6 +268,37 @@ f_sample *mixeng_clip[2][2][2][3] = {
}
};
void audio_sample_to_uint64(void *samples, int pos,
uint64_t *left, uint64_t *right)
{
struct st_sample *sample = samples;
sample += pos;
#ifdef FLOAT_MIXENG
error_report(
"Coreaudio and floating point samples are not supported by replay yet");
abort();
#else
*left = sample->l;
*right = sample->r;
#endif
}
void audio_sample_from_uint64(void *samples, int pos,
uint64_t left, uint64_t right)
{
struct st_sample *sample = samples;
sample += pos;
#ifdef FLOAT_MIXENG
error_report(
"Coreaudio and floating point samples are not supported by replay yet");
abort();
#else
sample->l = left;
sample->r = right;
#endif
}
/*
* August 21, 1998
* Copyright 1998 Fabrice Bellard.

View File

@@ -38,10 +38,14 @@
#define AUDIO_CAP "sdl"
#include "audio_int.h"
#define USE_SEMAPHORE (SDL_MAJOR_VERSION < 2)
typedef struct SDLVoiceOut {
HWVoiceOut hw;
int live;
#if USE_SEMAPHORE
int rpos;
#endif
int decr;
} SDLVoiceOut;
@@ -53,8 +57,10 @@ static struct {
static struct SDLAudioState {
int exit;
#if USE_SEMAPHORE
SDL_mutex *mutex;
SDL_sem *sem;
#endif
int initialized;
bool driver_created;
} glob_sdl;
@@ -73,31 +79,45 @@ static void GCC_FMT_ATTR (1, 2) sdl_logerr (const char *fmt, ...)
static int sdl_lock (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_LockMutex (s->mutex)) {
sdl_logerr ("SDL_LockMutex for %s failed\n", forfn);
return -1;
}
#else
SDL_LockAudio();
#endif
return 0;
}
static int sdl_unlock (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_UnlockMutex (s->mutex)) {
sdl_logerr ("SDL_UnlockMutex for %s failed\n", forfn);
return -1;
}
#else
SDL_UnlockAudio();
#endif
return 0;
}
static int sdl_post (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_SemPost (s->sem)) {
sdl_logerr ("SDL_SemPost for %s failed\n", forfn);
return -1;
}
#endif
return 0;
}
#if USE_SEMAPHORE
static int sdl_wait (SDLAudioState *s, const char *forfn)
{
if (SDL_SemWait (s->sem)) {
@@ -106,6 +126,7 @@ static int sdl_wait (SDLAudioState *s, const char *forfn)
}
return 0;
}
#endif
static int sdl_unlock_and_post (SDLAudioState *s, const char *forfn)
{
@@ -246,6 +267,7 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
int to_mix, decr;
/* dolog ("in callback samples=%d\n", samples); */
#if USE_SEMAPHORE
sdl_wait (s, "sdl_callback");
if (s->exit) {
return;
@@ -264,6 +286,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
if (!sdl->live) {
goto again;
}
#else
if (s->exit || !sdl->live) {
break;
}
#endif
/* dolog ("in callback live=%d\n", live); */
to_mix = audio_MIN (samples, sdl->live);
@@ -274,7 +301,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
/* dolog ("in callback to_mix %d, chunk %d\n", to_mix, chunk); */
hw->clip (buf, src, chunk);
#if USE_SEMAPHORE
sdl->rpos = (sdl->rpos + chunk) % hw->samples;
#else
hw->rpos = (hw->rpos + chunk) % hw->samples;
#endif
to_mix -= chunk;
buf += chunk << hw->info.shift;
}
@@ -282,12 +313,21 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
sdl->live -= decr;
sdl->decr += decr;
#if USE_SEMAPHORE
again:
if (sdl_unlock (s, "sdl_callback")) {
return;
}
#endif
}
/* dolog ("done len=%d\n", len); */
#if (SDL_MAJOR_VERSION >= 2)
/* SDL2 does not clear the remaining buffer for us, so do it on our own */
if (samples) {
memset(buf, 0, samples << hw->info.shift);
}
#endif
}
static int sdl_write_out (SWVoiceOut *sw, void *buf, int len)
@@ -315,8 +355,12 @@ static int sdl_run_out (HWVoiceOut *hw, int live)
decr = audio_MIN (sdl->decr, live);
sdl->decr -= decr;
#if USE_SEMAPHORE
sdl->live = live - decr;
hw->rpos = sdl->rpos;
#else
sdl->live = live;
#endif
if (sdl->live > 0) {
sdl_unlock_and_post (s, "sdl_run_out");
@@ -405,6 +449,7 @@ static void *sdl_audio_init (void)
return NULL;
}
#if USE_SEMAPHORE
s->mutex = SDL_CreateMutex ();
if (!s->mutex) {
sdl_logerr ("Failed to create SDL mutex\n");
@@ -419,6 +464,7 @@ static void *sdl_audio_init (void)
SDL_QuitSubSystem (SDL_INIT_AUDIO);
return NULL;
}
#endif
s->driver_created = true;
return s;
@@ -428,8 +474,10 @@ static void sdl_audio_fini (void *opaque)
{
SDLAudioState *s = opaque;
sdl_close (s);
#if USE_SEMAPHORE
SDL_DestroySemaphore (s->sem);
SDL_DestroyMutex (s->mutex);
#endif
SDL_QuitSubSystem (SDL_INIT_AUDIO);
s->driver_created = false;
}

View File

@@ -224,7 +224,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
void *ptr = memory_region_get_ram_ptr(&backend->mr);
uint64_t sz = memory_region_size(&backend->mr);
os_mem_prealloc(fd, ptr, sz, &local_err);
os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -328,7 +328,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
*/
if (backend->prealloc) {
os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
&local_err);
smp_cpus, &local_err);
if (local_err) {
goto out;
}

705
block.c
View File

@@ -707,6 +707,12 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
return 0;
}
static char *bdrv_child_get_parent_desc(BdrvChild *c)
{
BlockDriverState *parent = c->opaque;
return g_strdup(bdrv_get_device_or_node_name(parent));
}
static void bdrv_child_cb_drained_begin(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
@@ -774,6 +780,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
}
const BdrvChildRole child_file = {
.get_parent_desc = bdrv_child_get_parent_desc,
.inherit_options = bdrv_inherited_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
@@ -794,11 +801,63 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
}
const BdrvChildRole child_format = {
.get_parent_desc = bdrv_child_get_parent_desc,
.inherit_options = bdrv_inherited_fmt_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
};
static void bdrv_backing_attach(BdrvChild *c)
{
BlockDriverState *parent = c->opaque;
BlockDriverState *backing_hd = c->bs;
assert(!parent->backing_blocker);
error_setg(&parent->backing_blocker,
"node is used as backing hd of '%s'",
bdrv_get_device_or_node_name(parent));
parent->open_flags &= ~BDRV_O_NO_BACKING;
pstrcpy(parent->backing_file, sizeof(parent->backing_file),
backing_hd->filename);
pstrcpy(parent->backing_format, sizeof(parent->backing_format),
backing_hd->drv ? backing_hd->drv->format_name : "");
bdrv_op_block_all(backing_hd, parent->backing_blocker);
/* Otherwise we won't be able to commit or stream */
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
parent->backing_blocker);
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
parent->backing_blocker);
/*
* We do backup in 3 ways:
* 1. drive backup
* The target bs is new opened, and the source is top BDS
* 2. blockdev backup
* Both the source and the target are top BDSes.
* 3. internal backup(used for block replication)
* Both the source and the target are backing file
*
* In case 1 and 2, neither the source nor the target is the backing file.
* In case 3, we will block the top BDS, so there is only one block job
* for the top BDS and its backing chain.
*/
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
parent->backing_blocker);
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
parent->backing_blocker);
}
static void bdrv_backing_detach(BdrvChild *c)
{
BlockDriverState *parent = c->opaque;
assert(parent->backing_blocker);
bdrv_op_unblock_all(c->bs, parent->backing_blocker);
error_free(parent->backing_blocker);
parent->backing_blocker = NULL;
}
/*
* Returns the options and flags that bs->backing should get, based on the
* given options and flags for the parent BDS
@@ -823,7 +882,10 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
*child_flags = flags;
}
static const BdrvChildRole child_backing = {
const BdrvChildRole child_backing = {
.get_parent_desc = bdrv_child_get_parent_desc,
.attach = bdrv_backing_attach,
.detach = bdrv_backing_detach,
.inherit_options = bdrv_backing_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
@@ -1200,9 +1262,14 @@ static QDict *parse_json_filename(const char *filename, Error **errp)
ret = strstart(filename, "json:", &filename);
assert(ret);
options_obj = qobject_from_json(filename);
options_obj = qobject_from_json(filename, errp);
if (!options_obj) {
error_setg(errp, "Could not parse the JSON options");
/* Work around qobject_from_json() lossage TODO fix that */
if (errp && !*errp) {
error_setg(errp, "Could not parse the JSON options");
return NULL;
}
error_prepend(errp, "Could not parse the JSON options: ");
return NULL;
}
@@ -1326,7 +1393,342 @@ static int bdrv_fill_options(QDict **options, const char *filename,
return 0;
}
static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
/*
* Check whether permissions on this node can be changed in a way that
* @cumulative_perms and @cumulative_shared_perms are the new cumulative
* permissions of all its parents. This involves checking whether all necessary
* permission changes to child nodes can be performed.
*
* A call to this function must always be followed by a call to bdrv_set_perm()
* or bdrv_abort_perm_update().
*/
static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms,
uint64_t cumulative_shared_perms,
GSList *ignore_children, Error **errp)
{
BlockDriver *drv = bs->drv;
BdrvChild *c;
int ret;
/* Write permissions never work with read-only images */
if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
bdrv_is_read_only(bs))
{
error_setg(errp, "Block node is read-only");
return -EPERM;
}
/* Check this node */
if (!drv) {
return 0;
}
if (drv->bdrv_check_perm) {
return drv->bdrv_check_perm(bs, cumulative_perms,
cumulative_shared_perms, errp);
}
/* Drivers that never have children can omit .bdrv_child_perm() */
if (!drv->bdrv_child_perm) {
assert(QLIST_EMPTY(&bs->children));
return 0;
}
/* Check all children */
QLIST_FOREACH(c, &bs->children, next) {
uint64_t cur_perm, cur_shared;
drv->bdrv_child_perm(bs, c, c->role,
cumulative_perms, cumulative_shared_perms,
&cur_perm, &cur_shared);
ret = bdrv_child_check_perm(c, cur_perm, cur_shared, ignore_children,
errp);
if (ret < 0) {
return ret;
}
}
return 0;
}
/*
* Notifies drivers that after a previous bdrv_check_perm() call, the
* permission update is not performed and any preparations made for it (e.g.
* taken file locks) need to be undone.
*
* This function recursively notifies all child nodes.
*/
static void bdrv_abort_perm_update(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
BdrvChild *c;
if (!drv) {
return;
}
if (drv->bdrv_abort_perm_update) {
drv->bdrv_abort_perm_update(bs);
}
QLIST_FOREACH(c, &bs->children, next) {
bdrv_child_abort_perm_update(c);
}
}
static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
uint64_t cumulative_shared_perms)
{
BlockDriver *drv = bs->drv;
BdrvChild *c;
if (!drv) {
return;
}
/* Update this node */
if (drv->bdrv_set_perm) {
drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
}
/* Drivers that never have children can omit .bdrv_child_perm() */
if (!drv->bdrv_child_perm) {
assert(QLIST_EMPTY(&bs->children));
return;
}
/* Update all children */
QLIST_FOREACH(c, &bs->children, next) {
uint64_t cur_perm, cur_shared;
drv->bdrv_child_perm(bs, c, c->role,
cumulative_perms, cumulative_shared_perms,
&cur_perm, &cur_shared);
bdrv_child_set_perm(c, cur_perm, cur_shared);
}
}
static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
uint64_t *shared_perm)
{
BdrvChild *c;
uint64_t cumulative_perms = 0;
uint64_t cumulative_shared_perms = BLK_PERM_ALL;
QLIST_FOREACH(c, &bs->parents, next_parent) {
cumulative_perms |= c->perm;
cumulative_shared_perms &= c->shared_perm;
}
*perm = cumulative_perms;
*shared_perm = cumulative_shared_perms;
}
static char *bdrv_child_user_desc(BdrvChild *c)
{
if (c->role->get_parent_desc) {
return c->role->get_parent_desc(c);
}
return g_strdup("another user");
}
static char *bdrv_perm_names(uint64_t perm)
{
struct perm_name {
uint64_t perm;
const char *name;
} permissions[] = {
{ BLK_PERM_CONSISTENT_READ, "consistent read" },
{ BLK_PERM_WRITE, "write" },
{ BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
{ BLK_PERM_RESIZE, "resize" },
{ BLK_PERM_GRAPH_MOD, "change children" },
{ 0, NULL }
};
char *result = g_strdup("");
struct perm_name *p;
for (p = permissions; p->name; p++) {
if (perm & p->perm) {
char *old = result;
result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name);
g_free(old);
}
}
return result;
}
/*
* Checks whether a new reference to @bs can be added if the new user requires
* @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is
* set, the BdrvChild objects in this list are ignored in the calculations;
* this allows checking permission updates for an existing reference.
*
* Needs to be followed by a call to either bdrv_set_perm() or
* bdrv_abort_perm_update(). */
static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm,
uint64_t new_shared_perm,
GSList *ignore_children, Error **errp)
{
BdrvChild *c;
uint64_t cumulative_perms = new_used_perm;
uint64_t cumulative_shared_perms = new_shared_perm;
/* There is no reason why anyone couldn't tolerate write_unchanged */
assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (g_slist_find(ignore_children, c)) {
continue;
}
if ((new_used_perm & c->shared_perm) != new_used_perm) {
char *user = bdrv_child_user_desc(c);
char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
error_setg(errp, "Conflicts with use by %s as '%s', which does not "
"allow '%s' on %s",
user, c->name, perm_names, bdrv_get_node_name(c->bs));
g_free(user);
g_free(perm_names);
return -EPERM;
}
if ((c->perm & new_shared_perm) != c->perm) {
char *user = bdrv_child_user_desc(c);
char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
error_setg(errp, "Conflicts with use by %s as '%s', which uses "
"'%s' on %s",
user, c->name, perm_names, bdrv_get_node_name(c->bs));
g_free(user);
g_free(perm_names);
return -EPERM;
}
cumulative_perms |= c->perm;
cumulative_shared_perms &= c->shared_perm;
}
return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms,
ignore_children, errp);
}
/* Needs to be followed by a call to either bdrv_child_set_perm() or
* bdrv_child_abort_perm_update(). */
int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
GSList *ignore_children, Error **errp)
{
int ret;
ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c);
ret = bdrv_check_update_perm(c->bs, perm, shared, ignore_children, errp);
g_slist_free(ignore_children);
return ret;
}
void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
{
uint64_t cumulative_perms, cumulative_shared_perms;
c->perm = perm;
c->shared_perm = shared;
bdrv_get_cumulative_perm(c->bs, &cumulative_perms,
&cumulative_shared_perms);
bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms);
}
void bdrv_child_abort_perm_update(BdrvChild *c)
{
bdrv_abort_perm_update(c->bs);
}
int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
Error **errp)
{
int ret;
ret = bdrv_child_check_perm(c, perm, shared, NULL, errp);
if (ret < 0) {
bdrv_child_abort_perm_update(c);
return ret;
}
bdrv_child_set_perm(c, perm, shared);
return 0;
}
#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
| BLK_PERM_WRITE \
| BLK_PERM_WRITE_UNCHANGED \
| BLK_PERM_RESIZE)
#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH)
void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
if (c == NULL) {
*nperm = perm & DEFAULT_PERM_PASSTHROUGH;
*nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
return;
}
*nperm = (perm & DEFAULT_PERM_PASSTHROUGH) |
(c->perm & DEFAULT_PERM_UNCHANGED);
*nshared = (shared & DEFAULT_PERM_PASSTHROUGH) |
(c->shared_perm & DEFAULT_PERM_UNCHANGED);
}
void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
bool backing = (role == &child_backing);
assert(role == &child_backing || role == &child_file);
if (!backing) {
/* Apart from the modifications below, the same permissions are
* forwarded and left alone as for filters */
bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared);
/* Format drivers may touch metadata even if the guest doesn't write */
if (!bdrv_is_read_only(bs)) {
perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
}
/* bs->file always needs to be consistent because of the metadata. We
* can never allow other users to resize or write to it. */
perm |= BLK_PERM_CONSISTENT_READ;
shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
} else {
/* We want consistent read from backing files if the parent needs it.
* No other operations are performed on backing files. */
perm &= BLK_PERM_CONSISTENT_READ;
/* If the parent can deal with changing data, we're okay with a
* writable and resizable backing file. */
/* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */
if (shared & BLK_PERM_WRITE) {
shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
} else {
shared = 0;
}
shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
BLK_PERM_WRITE_UNCHANGED;
}
*nperm = perm;
*nshared = shared;
}
static void bdrv_replace_child_noperm(BdrvChild *child,
BlockDriverState *new_bs)
{
BlockDriverState *old_bs = child->bs;
@@ -1334,6 +1736,9 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
if (old_bs->quiesce_counter && child->role->drained_end) {
child->role->drained_end(child);
}
if (child->role->detach) {
child->role->detach(child);
}
QLIST_REMOVE(child, next_parent);
}
@@ -1344,22 +1749,72 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
if (new_bs->quiesce_counter && child->role->drained_begin) {
child->role->drained_begin(child);
}
if (child->role->attach) {
child->role->attach(child);
}
}
}
/*
* Updates @child to change its reference to point to @new_bs, including
* checking and applying the necessary permisson updates both to the old node
* and to @new_bs.
*
* NULL is passed as @new_bs for removing the reference before freeing @child.
*
* If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this
* function uses bdrv_set_perm() to update the permissions according to the new
* reference that @new_bs gets.
*/
static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
{
BlockDriverState *old_bs = child->bs;
uint64_t perm, shared_perm;
if (old_bs) {
/* Update permissions for old node. This is guaranteed to succeed
* because we're just taking a parent away, so we're loosening
* restrictions. */
bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
bdrv_check_perm(old_bs, perm, shared_perm, NULL, &error_abort);
bdrv_set_perm(old_bs, perm, shared_perm);
}
bdrv_replace_child_noperm(child, new_bs);
if (new_bs) {
bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm);
bdrv_set_perm(new_bs, perm, shared_perm);
}
}
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role,
void *opaque)
uint64_t perm, uint64_t shared_perm,
void *opaque, Error **errp)
{
BdrvChild *child = g_new(BdrvChild, 1);
BdrvChild *child;
int ret;
ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp);
if (ret < 0) {
bdrv_abort_perm_update(child_bs);
return NULL;
}
child = g_new(BdrvChild, 1);
*child = (BdrvChild) {
.bs = NULL,
.name = g_strdup(child_name),
.role = child_role,
.opaque = opaque,
.bs = NULL,
.name = g_strdup(child_name),
.role = child_role,
.perm = perm,
.shared_perm = shared_perm,
.opaque = opaque,
};
/* This performs the matching bdrv_set_perm() for the above check. */
bdrv_replace_child(child, child_bs);
return child;
@@ -1368,10 +1823,24 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role)
const BdrvChildRole *child_role,
Error **errp)
{
BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role,
parent_bs);
BdrvChild *child;
uint64_t perm, shared_perm;
bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
assert(parent_bs->drv);
parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role,
perm, shared_perm, &perm, &shared_perm);
child = bdrv_root_attach_child(child_bs, child_name, child_role,
perm, shared_perm, parent_bs, errp);
if (child == NULL) {
return NULL;
}
QLIST_INSERT_HEAD(&parent_bs->children, child, next);
return child;
}
@@ -1447,57 +1916,30 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs)
* Sets the backing file link of a BDS. A new reference is created; callers
* which don't need their own reference any more must call bdrv_unref().
*/
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
Error **errp)
{
if (backing_hd) {
bdrv_ref(backing_hd);
}
if (bs->backing) {
assert(bs->backing_blocker);
bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
bdrv_unref_child(bs, bs->backing);
} else if (backing_hd) {
error_setg(&bs->backing_blocker,
"node is used as backing hd of '%s'",
bdrv_get_device_or_node_name(bs));
}
if (!backing_hd) {
error_free(bs->backing_blocker);
bs->backing_blocker = NULL;
bs->backing = NULL;
goto out;
}
bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
bs->open_flags &= ~BDRV_O_NO_BACKING;
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
pstrcpy(bs->backing_format, sizeof(bs->backing_format),
backing_hd->drv ? backing_hd->drv->format_name : "");
bdrv_op_block_all(backing_hd, bs->backing_blocker);
/* Otherwise we won't be able to commit or stream */
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
bs->backing_blocker);
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
bs->backing_blocker);
/*
* We do backup in 3 ways:
* 1. drive backup
* The target bs is new opened, and the source is top BDS
* 2. blockdev backup
* Both the source and the target are top BDSes.
* 3. internal backup(used for block replication)
* Both the source and the target are backing file
*
* In case 1 and 2, neither the source nor the target is the backing file.
* In case 3, we will block the top BDS, so there is only one block job
* for the top BDS and its backing chain.
*/
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
bs->backing_blocker);
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
bs->backing_blocker);
bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing,
errp);
if (!bs->backing) {
bdrv_unref(backing_hd);
}
bdrv_refresh_filename(bs);
out:
bdrv_refresh_limits(bs, NULL);
}
@@ -1580,8 +2022,12 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
/* Hook up the backing file link; drop our reference, bs owns the
* backing_hd reference now */
bdrv_set_backing_hd(bs, backing_hd);
bdrv_set_backing_hd(bs, backing_hd, &local_err);
bdrv_unref(backing_hd);
if (local_err) {
ret = -EINVAL;
goto free_exit;
}
qdict_del(parent_options, bdref_key);
@@ -1648,6 +2094,7 @@ BdrvChild *bdrv_open_child(const char *filename,
const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
BdrvChild *c;
BlockDriverState *bs;
bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
@@ -1656,7 +2103,13 @@ BdrvChild *bdrv_open_child(const char *filename,
return NULL;
}
return bdrv_attach_child(parent, bs, bdref_key, child_role);
c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
if (!c) {
bdrv_unref(bs);
return NULL;
}
return c;
}
static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
@@ -1669,6 +2122,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
int64_t total_size;
QemuOpts *opts = NULL;
BlockDriverState *bs_snapshot;
Error *local_err = NULL;
int ret;
/* if snapshot, we create a temporary backing file and open it
@@ -1718,7 +2172,12 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
* call bdrv_unref() on it), so in order to be able to return one, we have
* to increase bs_snapshot's refcount here */
bdrv_ref(bs_snapshot);
bdrv_append(bs_snapshot, bs);
bdrv_append(bs_snapshot, bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto out;
}
g_free(tmp_filename);
return bs_snapshot;
@@ -1862,9 +2321,12 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
goto fail;
}
if (file_bs != NULL) {
file = blk_new();
blk_insert_bs(file, file_bs);
file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
blk_insert_bs(file, file_bs, &local_err);
bdrv_unref(file_bs);
if (local_err) {
goto fail;
}
qdict_put(options, "file",
qstring_from_str(bdrv_get_node_name(file_bs)));
@@ -2405,7 +2867,7 @@ static void bdrv_close(BlockDriverState *bs)
bs->drv->bdrv_close(bs);
bs->drv = NULL;
bdrv_set_backing_hd(bs, NULL);
bdrv_set_backing_hd(bs, NULL, &error_abort);
if (bs->file != NULL) {
bdrv_unref_child(bs, bs->file);
@@ -2459,31 +2921,82 @@ void bdrv_close_all(void)
assert(QTAILQ_EMPTY(&all_bdrv_states));
}
static void change_parent_backing_link(BlockDriverState *from,
BlockDriverState *to)
static bool should_update_child(BdrvChild *c, BlockDriverState *to)
{
BdrvChild *c, *next, *to_c;
BdrvChild *to_c;
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
if (c->role == &child_backing) {
/* @from is generally not allowed to be a backing file, except for
* when @to is the overlay. In that case, @from may not be replaced
* by @to as @to's backing node. */
QLIST_FOREACH(to_c, &to->children, next) {
if (to_c == c) {
break;
}
}
if (to_c) {
continue;
if (c->role->stay_at_node) {
return false;
}
if (c->role == &child_backing) {
/* If @from is a backing file of @to, ignore the child to avoid
* creating a loop. We only want to change the pointer of other
* parents. */
QLIST_FOREACH(to_c, &to->children, next) {
if (to_c == c) {
break;
}
}
if (to_c) {
return false;
}
}
return true;
}
void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp)
{
BdrvChild *c, *next;
GSList *list = NULL, *p;
uint64_t old_perm, old_shared;
uint64_t perm = 0, shared = BLK_PERM_ALL;
int ret;
assert(!atomic_read(&from->in_flight));
assert(!atomic_read(&to->in_flight));
/* Make sure that @from doesn't go away until we have successfully attached
* all of its parents to @to. */
bdrv_ref(from);
/* Put all parents into @list and calculate their cumulative permissions */
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
if (!should_update_child(c, to)) {
continue;
}
list = g_slist_prepend(list, c);
perm |= c->perm;
shared &= c->shared_perm;
}
/* Check whether the required permissions can be granted on @to, ignoring
* all BdrvChild in @list so that they can't block themselves. */
ret = bdrv_check_update_perm(to, perm, shared, list, errp);
if (ret < 0) {
bdrv_abort_perm_update(to);
goto out;
}
/* Now actually perform the change. We performed the permission check for
* all elements of @list at once, so set the permissions all at once at the
* very end. */
for (p = list; p != NULL; p = p->next) {
c = p->data;
assert(c->role != &child_backing);
bdrv_ref(to);
bdrv_replace_child(c, to);
bdrv_replace_child_noperm(c, to);
bdrv_unref(from);
}
bdrv_get_cumulative_perm(to, &old_perm, &old_shared);
bdrv_set_perm(to, old_perm | perm, old_shared | shared);
out:
g_slist_free(list);
bdrv_unref(from);
}
/*
@@ -2502,34 +3015,30 @@ static void change_parent_backing_link(BlockDriverState *from,
* parents of bs_top after bdrv_append() returns. If the caller needs to keep a
* reference of its own, it must call bdrv_ref().
*/
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
Error **errp)
{
assert(!bdrv_requests_pending(bs_top));
assert(!bdrv_requests_pending(bs_new));
Error *local_err = NULL;
bdrv_ref(bs_top);
bdrv_set_backing_hd(bs_new, bs_top, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto out;
}
change_parent_backing_link(bs_top, bs_new);
bdrv_set_backing_hd(bs_new, bs_top);
bdrv_unref(bs_top);
bdrv_replace_node(bs_top, bs_new, &local_err);
if (local_err) {
error_propagate(errp, local_err);
bdrv_set_backing_hd(bs_new, NULL, &error_abort);
goto out;
}
/* bs_new is now referenced by its new parents, we don't need the
* additional reference any more. */
out:
bdrv_unref(bs_new);
}
void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
{
assert(!bdrv_requests_pending(old));
assert(!bdrv_requests_pending(new));
bdrv_ref(old);
change_parent_backing_link(old, new);
bdrv_unref(old);
}
static void bdrv_delete(BlockDriverState *bs)
{
assert(!bs->job);
@@ -2658,6 +3167,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
BlockDriverState *base, const char *backing_file_str)
{
BlockDriverState *new_top_bs = NULL;
Error *local_err = NULL;
int ret = -EIO;
if (!top->drv || !base->drv) {
@@ -2690,7 +3200,13 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
if (ret) {
goto exit;
}
bdrv_set_backing_hd(new_top_bs, base);
bdrv_set_backing_hd(new_top_bs, base, &local_err);
if (local_err) {
ret = -EPERM;
error_report_err(local_err);
goto exit;
}
ret = 0;
exit:
@@ -2705,6 +3221,9 @@ int bdrv_truncate(BdrvChild *child, int64_t offset)
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
int ret;
assert(child->perm & BLK_PERM_RESIZE);
if (!drv)
return -ENOMEDIUM;
if (!drv->bdrv_truncate)

View File

@@ -19,7 +19,6 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
@@ -41,7 +40,6 @@ gluster.o-cflags := $(GLUSTERFS_CFLAGS)
gluster.o-libs := $(GLUSTERFS_LIBS)
ssh.o-cflags := $(LIBSSH2_CFLAGS)
ssh.o-libs := $(LIBSSH2_LIBS)
archipelago.o-libs := $(ARCHIPELAGO_LIBS)
block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
dmg-bz2.o-libs := $(BZIP2_LIBS)
qcow.o-libs := -lz

File diff suppressed because it is too large Load Diff

View File

@@ -24,6 +24,7 @@
#include "qemu/cutils.h"
#include "sysemu/block-backend.h"
#include "qemu/bitmap.h"
#include "qemu/error-report.h"
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
#define SLICE_TIME 100000000ULL /* ns */
@@ -467,13 +468,14 @@ static void coroutine_fn backup_run(void *opaque)
/* Both FULL and TOP SYNC_MODE's require copying.. */
for (; start < end; start++) {
bool error_is_read;
int alloced = 0;
if (yield_and_check(job)) {
break;
}
if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
int i, n;
int alloced = 0;
/* Check to see if these blocks are already in the
* backing file. */
@@ -491,7 +493,7 @@ static void coroutine_fn backup_run(void *opaque)
sectors_per_cluster - i, &n);
i += n;
if (alloced == 1 || n == 0) {
if (alloced || n == 0) {
break;
}
}
@@ -503,8 +505,13 @@ static void coroutine_fn backup_run(void *opaque)
}
}
/* FULL sync mode we copy the whole drive. */
ret = backup_do_cow(job, start * sectors_per_cluster,
sectors_per_cluster, &error_is_read, false);
if (alloced < 0) {
ret = alloced;
} else {
ret = backup_do_cow(job, start * sectors_per_cluster,
sectors_per_cluster, &error_is_read,
false);
}
if (ret < 0) {
/* Depending on error action, fail now or retry cluster */
BlockErrorAction action =
@@ -618,14 +625,24 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
job = block_job_create(job_id, &backup_job_driver, bs, speed,
creation_flags, cb, opaque, errp);
/* job->common.len is fixed, so we can't allow resize */
job = block_job_create(job_id, &backup_job_driver, bs,
BLK_PERM_CONSISTENT_READ,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
speed, creation_flags, cb, opaque, errp);
if (!job) {
goto error;
}
job->target = blk_new();
blk_insert_bs(job->target, target);
/* The target must match the source in size, so no resize here either */
job->target = blk_new(BLK_PERM_WRITE,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
ret = blk_insert_bs(job->target, target, errp);
if (ret < 0) {
goto error;
}
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
@@ -638,7 +655,16 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
* backup cluster size is smaller than the target cluster size. Even for
* targets with a backing file, try to avoid COW if possible. */
ret = bdrv_get_info(target, &bdi);
if (ret < 0 && !target->backing) {
if (ret == -ENOTSUP && !target->backing) {
/* Cluster size is not defined */
error_report("WARNING: The target block device doesn't provide "
"information about the block size and it doesn't have a "
"backing file. The default block size of %u bytes is "
"used. If the actual block size of the target exceeds "
"this default, the backup may be unusable",
BACKUP_CLUSTER_SIZE_DEFAULT);
job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
} else if (ret < 0 && !target->backing) {
error_setg_errno(errp, -ret,
"Couldn't determine the cluster size of the target image, "
"which has no backing file");
@@ -652,7 +678,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}
block_job_add_bdrv(&job->common, target);
/* Required permissions are already taken with target's blk_new() */
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
job->common.len = len;
block_job_txn_add_job(txn, &job->common);

View File

@@ -734,6 +734,8 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_file_open = blkdebug_open,
.bdrv_close = blkdebug_close,
.bdrv_reopen_prepare = blkdebug_reopen_prepare,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkdebug_getlength,
.bdrv_truncate = blkdebug_truncate,
.bdrv_refresh_filename = blkdebug_refresh_filename,

View File

@@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = {
.bdrv_file_open = blkreplay_open,
.bdrv_close = blkreplay_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkreplay_getlength,
.bdrv_co_preadv = blkreplay_co_preadv,

View File

@@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = {
.bdrv_parse_filename = blkverify_parse_filename,
.bdrv_file_open = blkverify_open,
.bdrv_close = blkverify_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkverify_getlength,
.bdrv_refresh_filename = blkverify_refresh_filename,

View File

@@ -59,6 +59,9 @@ struct BlockBackend {
bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
uint64_t perm;
uint64_t shared_perm;
bool allow_write_beyond_eof;
NotifierList remove_bs_notifiers, insert_bs_notifiers;
@@ -77,6 +80,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
static char *blk_get_attached_dev_id(BlockBackend *blk);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -99,6 +103,25 @@ static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
static char *blk_root_get_parent_desc(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
char *dev_id;
if (blk->name) {
return g_strdup(blk->name);
}
dev_id = blk_get_attached_dev_id(blk);
if (*dev_id) {
return dev_id;
} else {
/* TODO Callback into the BB owner for something more detailed */
g_free(dev_id);
return g_strdup("a block device");
}
}
static const char *blk_root_get_name(BdrvChild *child)
{
return blk_name(child->opaque);
@@ -110,6 +133,7 @@ static const BdrvChildRole child_root = {
.change_media = blk_root_change_media,
.resize = blk_root_resize,
.get_name = blk_root_get_name,
.get_parent_desc = blk_root_get_parent_desc,
.drained_begin = blk_root_drained_begin,
.drained_end = blk_root_drained_end,
@@ -117,15 +141,23 @@ static const BdrvChildRole child_root = {
/*
* Create a new BlockBackend with a reference count of one.
* Store an error through @errp on failure, unless it's null.
*
* @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
* to request for a block driver node that is attached to this BlockBackend.
* @shared_perm is a bitmask which describes which permissions may be granted
* to other users of the attached node.
* Both sets of permissions can be changed later using blk_set_perm().
*
* Return the new BlockBackend on success, null on failure.
*/
BlockBackend *blk_new(void)
BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
{
BlockBackend *blk;
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
blk->perm = perm;
blk->shared_perm = shared_perm;
blk_set_enable_write_cache(blk, true);
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
@@ -155,15 +187,38 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
{
BlockBackend *blk;
BlockDriverState *bs;
uint64_t perm;
blk = blk_new();
/* blk_new_open() is mainly used in .bdrv_create implementations and the
* tools where sharing isn't a concern because the BDS stays private, so we
* just request permission according to the flags.
*
* The exceptions are xen_disk and blockdev_init(); in these cases, the
* caller of blk_new_open() doesn't make use of the permissions, but they
* shouldn't hurt either. We can still share everything here because the
* guest devices will add their own blockers if they can't share. */
perm = BLK_PERM_CONSISTENT_READ;
if (flags & BDRV_O_RDWR) {
perm |= BLK_PERM_WRITE;
}
if (flags & BDRV_O_RESIZE) {
perm |= BLK_PERM_RESIZE;
}
blk = blk_new(perm, BLK_PERM_ALL);
bs = bdrv_open(filename, reference, options, flags, errp);
if (!bs) {
blk_unref(blk);
return NULL;
}
blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
perm, BLK_PERM_ALL, blk, errp);
if (!blk->root) {
bdrv_unref(bs);
blk_unref(blk);
return NULL;
}
return blk;
}
@@ -495,16 +550,49 @@ void blk_remove_bs(BlockBackend *blk)
/*
* Associates a new BlockDriverState with @blk.
*/
void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
blk->perm, blk->shared_perm, blk, errp);
if (blk->root == NULL) {
return -EPERM;
}
bdrv_ref(bs);
blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
if (blk->public.throttle_state) {
throttle_timers_attach_aio_context(
&blk->public.throttle_timers, bdrv_get_aio_context(bs));
}
return 0;
}
/*
* Sets the permission bitmasks that the user of the BlockBackend needs.
*/
int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
Error **errp)
{
int ret;
if (blk->root) {
ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
if (ret < 0) {
return ret;
}
}
blk->perm = perm;
blk->shared_perm = shared_perm;
return 0;
}
void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
{
*perm = blk->perm;
*shared_perm = blk->shared_perm;
}
static int blk_do_attach_dev(BlockBackend *blk, void *dev)
@@ -553,6 +641,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
blk->guest_block_size = 512;
blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
blk_unref(blk);
}
@@ -620,19 +709,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
/*
* Notify @blk's attached device model of media change.
* If @load is true, notify of media load.
* Else, notify of media eject.
*
* If @load is true, notify of media load. This action can fail, meaning that
* the medium cannot be loaded. @errp is set then.
*
* If @load is false, notify of media eject. This can never fail.
*
* Also send DEVICE_TRAY_MOVED events as appropriate.
*/
void blk_dev_change_media_cb(BlockBackend *blk, bool load)
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
{
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
bool tray_was_open, tray_is_open;
Error *local_err = NULL;
assert(!blk->legacy_dev);
tray_was_open = blk_dev_is_tray_open(blk);
blk->dev_ops->change_media_cb(blk->dev_opaque, load);
blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
if (local_err) {
assert(load == true);
error_propagate(errp, local_err);
return;
}
tray_is_open = blk_dev_is_tray_open(blk);
if (tray_was_open != tray_is_open) {
@@ -646,7 +745,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
static void blk_root_change_media(BdrvChild *child, bool load)
{
blk_dev_change_media_cb(child->opaque, load);
blk_dev_change_media_cb(child->opaque, load, NULL);
}
/*

View File

@@ -293,6 +293,7 @@ static BlockDriver bdrv_bochs = {
.instance_size = sizeof(BDRVBochsState),
.bdrv_probe = bochs_probe,
.bdrv_open = bochs_open,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_refresh_limits = bochs_refresh_limits,
.bdrv_co_preadv = bochs_co_preadv,
.bdrv_close = bochs_close,

View File

@@ -290,6 +290,7 @@ static BlockDriver bdrv_cloop = {
.instance_size = sizeof(BDRVCloopState),
.bdrv_probe = cloop_probe,
.bdrv_open = cloop_open,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_refresh_limits = cloop_refresh_limits,
.bdrv_co_preadv = cloop_co_preadv,
.bdrv_close = cloop_close,

View File

@@ -13,6 +13,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob_int.h"
@@ -36,6 +37,7 @@ typedef struct CommitBlockJob {
BlockJob common;
RateLimit limit;
BlockDriverState *active;
BlockDriverState *commit_top_bs;
BlockBackend *top;
BlockBackend *base;
BlockdevOnError on_error;
@@ -83,12 +85,23 @@ static void commit_complete(BlockJob *job, void *opaque)
BlockDriverState *active = s->active;
BlockDriverState *top = blk_bs(s->top);
BlockDriverState *base = blk_bs(s->base);
BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
int ret = data->ret;
bool remove_commit_top_bs = false;
/* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
* the normal backing chain can be restored. */
blk_unref(s->base);
if (!block_job_is_cancelled(&s->common) && ret == 0) {
/* success */
ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
s->backing_file_str);
} else if (overlay_bs) {
/* XXX Can (or should) we somehow keep 'consistent read' blocked even
* after the failed/cancelled commit job is gone? If we already wrote
* something to base, the intermediate images aren't valid any more. */
remove_commit_top_bs = true;
}
/* restore base open flags here if appropriate (e.g., change the base back
@@ -102,9 +115,15 @@ static void commit_complete(BlockJob *job, void *opaque)
}
g_free(s->backing_file_str);
blk_unref(s->top);
blk_unref(s->base);
block_job_completed(&s->common, ret);
g_free(data);
/* If bdrv_drop_intermediate() didn't already do that, remove the commit
* filter driver from the backing chain. Do this as the final step so that
* the 'consistent read' permission can be granted. */
if (remove_commit_top_bs) {
bdrv_set_backing_hd(overlay_bs, top, &error_abort);
}
}
static void coroutine_fn commit_run(void *opaque)
@@ -208,10 +227,57 @@ static const BlockJobDriver commit_job_driver = {
.start = commit_run,
};
static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
}
static int64_t coroutine_fn bdrv_commit_top_get_block_status(
BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
BlockDriverState **file)
{
*pnum = nb_sectors;
*file = bs->backing->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
(sector_num << BDRV_SECTOR_BITS);
}
static void bdrv_commit_top_refresh_filename(BlockDriverState *bs, QDict *opts)
{
bdrv_refresh_filename(bs->backing->bs);
pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
bs->backing->bs->filename);
}
static void bdrv_commit_top_close(BlockDriverState *bs)
{
}
static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
*nperm = 0;
*nshared = BLK_PERM_ALL;
}
/* Dummy node that provides consistent read to its users without requiring it
* from its backing file and that allows writes on the backing file chain. */
static BlockDriver bdrv_commit_top = {
.format_name = "commit_top",
.bdrv_co_preadv = bdrv_commit_top_preadv,
.bdrv_co_get_block_status = bdrv_commit_top_get_block_status,
.bdrv_refresh_filename = bdrv_commit_top_refresh_filename,
.bdrv_close = bdrv_commit_top_close,
.bdrv_child_perm = bdrv_commit_top_child_perm,
};
void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
Error **errp)
const char *filter_node_name, Error **errp)
{
CommitBlockJob *s;
BlockReopenQueue *reopen_queue = NULL;
@@ -219,7 +285,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
int orig_base_flags;
BlockDriverState *iter;
BlockDriverState *overlay_bs;
BlockDriverState *commit_top_bs = NULL;
Error *local_err = NULL;
int ret;
assert(top != bs);
if (top == base) {
@@ -234,8 +302,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
return;
}
s = block_job_create(job_id, &commit_job_driver, bs, speed,
BLOCK_JOB_DEFAULT, NULL, NULL, errp);
s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
return;
}
@@ -256,30 +324,82 @@ void commit_start(const char *job_id, BlockDriverState *bs,
bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
block_job_unref(&s->common);
return;
goto fail;
}
}
/* Insert commit_top block node above top, so we can block consistent read
* on the backing chain below it */
commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
errp);
if (commit_top_bs == NULL) {
goto fail;
}
bdrv_set_backing_hd(commit_top_bs, top, &local_err);
if (local_err) {
bdrv_unref(commit_top_bs);
commit_top_bs = NULL;
error_propagate(errp, local_err);
goto fail;
}
bdrv_set_backing_hd(overlay_bs, commit_top_bs, &local_err);
if (local_err) {
bdrv_unref(commit_top_bs);
commit_top_bs = NULL;
error_propagate(errp, local_err);
goto fail;
}
s->commit_top_bs = commit_top_bs;
bdrv_unref(commit_top_bs);
/* Block all nodes between top and base, because they will
* disappear from the chain after this operation. */
assert(bdrv_chain_contains(top, base));
for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
block_job_add_bdrv(&s->common, iter);
for (iter = top; iter != base; iter = backing_bs(iter)) {
/* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
* at s->base (if writes are blocked for a node, they are also blocked
* for its backing file). The other options would be a second filter
* driver above s->base. */
ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
errp);
if (ret < 0) {
goto fail;
}
}
ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
if (ret < 0) {
goto fail;
}
/* overlay_bs must be blocked because it needs to be modified to
* update the backing image string, but if it's the root node then
* don't block it again */
if (bs != overlay_bs) {
block_job_add_bdrv(&s->common, overlay_bs);
* update the backing image string. */
ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
if (ret < 0) {
goto fail;
}
s->base = blk_new();
blk_insert_bs(s->base, base);
s->base = blk_new(BLK_PERM_CONSISTENT_READ
| BLK_PERM_WRITE
| BLK_PERM_RESIZE,
BLK_PERM_CONSISTENT_READ
| BLK_PERM_GRAPH_MOD
| BLK_PERM_WRITE_UNCHANGED);
ret = blk_insert_bs(s->base, base, errp);
if (ret < 0) {
goto fail;
}
s->top = blk_new();
blk_insert_bs(s->top, top);
/* Required permissions are already taken with block_job_add_bdrv() */
s->top = blk_new(0, BLK_PERM_ALL);
ret = blk_insert_bs(s->top, top, errp);
if (ret < 0) {
goto fail;
}
s->active = bs;
@@ -292,6 +412,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
trace_commit_start(bs, base, top, s);
block_job_start(&s->common);
return;
fail:
if (s->base) {
blk_unref(s->base);
}
if (s->top) {
blk_unref(s->top);
}
if (commit_top_bs) {
bdrv_set_backing_hd(overlay_bs, top, &error_abort);
}
block_job_unref(&s->common);
}
@@ -301,11 +434,14 @@ void commit_start(const char *job_id, BlockDriverState *bs,
int bdrv_commit(BlockDriverState *bs)
{
BlockBackend *src, *backing;
BlockDriverState *backing_file_bs = NULL;
BlockDriverState *commit_top_bs = NULL;
BlockDriver *drv = bs->drv;
int64_t sector, total_sectors, length, backing_length;
int n, ro, open_flags;
int ret = 0;
uint8_t *buf = NULL;
Error *local_err = NULL;
if (!drv)
return -ENOMEDIUM;
@@ -328,11 +464,33 @@ int bdrv_commit(BlockDriverState *bs)
}
}
src = blk_new();
blk_insert_bs(src, bs);
src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
backing = blk_new();
blk_insert_bs(backing, bs->backing->bs);
ret = blk_insert_bs(src, bs, &local_err);
if (ret < 0) {
error_report_err(local_err);
goto ro_cleanup;
}
/* Insert commit_top block node above backing, so we can write to it */
backing_file_bs = backing_bs(bs);
commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
&local_err);
if (commit_top_bs == NULL) {
error_report_err(local_err);
goto ro_cleanup;
}
bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
ret = blk_insert_bs(backing, backing_file_bs, &local_err);
if (ret < 0) {
error_report_err(local_err);
goto ro_cleanup;
}
length = blk_getlength(src);
if (length < 0) {
@@ -404,8 +562,12 @@ int bdrv_commit(BlockDriverState *bs)
ro_cleanup:
qemu_vfree(buf);
blk_unref(src);
blk_unref(backing);
if (backing_file_bs) {
bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
}
bdrv_unref(commit_top_bs);
blk_unref(src);
if (ro) {
/* ignoring error return here */

View File

@@ -628,6 +628,7 @@ BlockDriver bdrv_crypto_luks = {
.bdrv_probe = block_crypto_probe_luks,
.bdrv_open = block_crypto_open_luks,
.bdrv_close = block_crypto_close,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = block_crypto_create_luks,
.bdrv_truncate = block_crypto_truncate,
.create_opts = &block_crypto_create_opts_luks,

View File

@@ -135,6 +135,7 @@ typedef struct BDRVCURLState {
char *cookie;
bool accept_range;
AioContext *aio_context;
QemuMutex mutex;
char *username;
char *password;
char *proxyusername;
@@ -333,6 +334,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
return FIND_RET_NONE;
}
/* Called with s->mutex held. */
static void curl_multi_check_completion(BDRVCURLState *s)
{
int msgs_in_queue;
@@ -374,7 +376,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
continue;
}
qemu_mutex_unlock(&s->mutex);
acb->common.cb(acb->common.opaque, -EPROTO);
qemu_mutex_lock(&s->mutex);
qemu_aio_unref(acb);
state->acb[i] = NULL;
}
@@ -386,6 +390,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
}
}
/* Called with s->mutex held. */
static void curl_multi_do_locked(CURLState *s)
{
CURLSocket *socket, *next_socket;
@@ -409,19 +414,19 @@ static void curl_multi_do(void *arg)
{
CURLState *s = (CURLState *)arg;
aio_context_acquire(s->s->aio_context);
qemu_mutex_lock(&s->s->mutex);
curl_multi_do_locked(s);
aio_context_release(s->s->aio_context);
qemu_mutex_unlock(&s->s->mutex);
}
static void curl_multi_read(void *arg)
{
CURLState *s = (CURLState *)arg;
aio_context_acquire(s->s->aio_context);
qemu_mutex_lock(&s->s->mutex);
curl_multi_do_locked(s);
curl_multi_check_completion(s->s);
aio_context_release(s->s->aio_context);
qemu_mutex_unlock(&s->s->mutex);
}
static void curl_multi_timeout_do(void *arg)
@@ -434,11 +439,11 @@ static void curl_multi_timeout_do(void *arg)
return;
}
aio_context_acquire(s->aio_context);
qemu_mutex_lock(&s->mutex);
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
curl_multi_check_completion(s);
aio_context_release(s->aio_context);
qemu_mutex_unlock(&s->mutex);
#else
abort();
#endif
@@ -771,6 +776,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
curl_easy_cleanup(state->curl);
state->curl = NULL;
qemu_mutex_init(&s->mutex);
curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
qemu_opts_del(opts);
@@ -801,12 +807,11 @@ static void curl_readv_bh_cb(void *p)
CURLAIOCB *acb = p;
BlockDriverState *bs = acb->common.bs;
BDRVCURLState *s = bs->opaque;
AioContext *ctx = bdrv_get_aio_context(bs);
size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
size_t end;
aio_context_acquire(ctx);
qemu_mutex_lock(&s->mutex);
// In case we have the requested data already (e.g. read-ahead),
// we can just call the callback and be done.
@@ -854,7 +859,7 @@ static void curl_readv_bh_cb(void *p)
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
out:
aio_context_release(ctx);
qemu_mutex_unlock(&s->mutex);
if (ret != -EINPROGRESS) {
acb->common.cb(acb->common.opaque, ret);
qemu_aio_unref(acb);
@@ -883,6 +888,7 @@ static void curl_close(BlockDriverState *bs)
DPRINTF("CURL: Close\n");
curl_detach_aio_context(bs);
qemu_mutex_destroy(&s->mutex);
g_free(s->cookie);
g_free(s->url);

View File

@@ -697,6 +697,7 @@ static BlockDriver bdrv_dmg = {
.bdrv_probe = dmg_probe,
.bdrv_open = dmg_open,
.bdrv_refresh_limits = dmg_refresh_limits,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_preadv = dmg_co_preadv,
.bdrv_close = dmg_close,
};

View File

@@ -668,6 +668,48 @@ static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
#endif
}
static int hdev_get_max_segments(const struct stat *st)
{
#ifdef CONFIG_LINUX
char buf[32];
const char *end;
char *sysfspath;
int ret;
int fd = -1;
long max_segments;
sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
major(st->st_rdev), minor(st->st_rdev));
fd = open(sysfspath, O_RDONLY);
if (fd == -1) {
ret = -errno;
goto out;
}
do {
ret = read(fd, buf, sizeof(buf));
} while (ret == -1 && errno == EINTR);
if (ret < 0) {
ret = -errno;
goto out;
} else if (ret == 0) {
ret = -EIO;
goto out;
}
buf[ret] = 0;
/* The file is ended with '\n', pass 'end' to accept that. */
ret = qemu_strtol(buf, &end, 10, &max_segments);
if (ret == 0 && end && *end == '\n') {
ret = max_segments;
}
out:
g_free(sysfspath);
return ret;
#else
return -ENOTSUP;
#endif
}
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVRawState *s = bs->opaque;
@@ -679,6 +721,11 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
bs->bl.max_transfer = pow2floor(ret);
}
ret = hdev_get_max_segments(&st);
if (ret > 0) {
bs->bl.max_transfer = MIN(bs->bl.max_transfer,
ret * getpagesize());
}
}
}

View File

@@ -12,6 +12,7 @@
#include "block/block_int.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qapi/util.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
#include "qemu/cutils.h"
@@ -151,7 +152,7 @@ static QemuOptsList runtime_type_opts = {
{
.name = GLUSTER_OPT_TYPE,
.type = QEMU_OPT_STRING,
.help = "tcp|unix",
.help = "inet|unix",
},
{ /* end of list */ }
},
@@ -170,14 +171,14 @@ static QemuOptsList runtime_unix_opts = {
},
};
static QemuOptsList runtime_tcp_opts = {
.name = "gluster_tcp",
.head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
static QemuOptsList runtime_inet_opts = {
.name = "gluster_inet",
.head = QTAILQ_HEAD_INITIALIZER(runtime_inet_opts.head),
.desc = {
{
.name = GLUSTER_OPT_TYPE,
.type = QEMU_OPT_STRING,
.help = "tcp|unix",
.help = "inet|unix",
},
{
.name = GLUSTER_OPT_HOST,
@@ -320,7 +321,7 @@ static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
const char *filename)
{
GlusterServer *gsconf;
SocketAddressFlat *gsconf;
URI *uri;
QueryParams *qp = NULL;
bool is_unix = false;
@@ -331,19 +332,19 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
return -EINVAL;
}
gconf->server = g_new0(GlusterServerList, 1);
gconf->server->value = gsconf = g_new0(GlusterServer, 1);
gconf->server = g_new0(SocketAddressFlatList, 1);
gconf->server->value = gsconf = g_new0(SocketAddressFlat, 1);
/* transport */
if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
gsconf->type = GLUSTER_TRANSPORT_TCP;
gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_INET;
} else if (!strcmp(uri->scheme, "gluster+tcp")) {
gsconf->type = GLUSTER_TRANSPORT_TCP;
gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_INET;
} else if (!strcmp(uri->scheme, "gluster+unix")) {
gsconf->type = GLUSTER_TRANSPORT_UNIX;
gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_UNIX;
is_unix = true;
} else if (!strcmp(uri->scheme, "gluster+rdma")) {
gsconf->type = GLUSTER_TRANSPORT_TCP;
gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_INET;
error_report("Warning: rdma feature is not supported, falling "
"back to tcp");
} else {
@@ -373,11 +374,11 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
}
gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
} else {
gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost");
gsconf->u.inet.host = g_strdup(uri->server ? uri->server : "localhost");
if (uri->port) {
gsconf->u.tcp.port = g_strdup_printf("%d", uri->port);
gsconf->u.inet.port = g_strdup_printf("%d", uri->port);
} else {
gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
gsconf->u.inet.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
}
}
@@ -395,7 +396,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
struct glfs *glfs;
int ret;
int old_errno;
GlusterServerList *server;
SocketAddressFlatList *server;
unsigned long long port;
glfs = glfs_find_preopened(gconf->volume);
@@ -411,21 +412,19 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
glfs_set_preopened(gconf->volume, glfs);
for (server = gconf->server; server; server = server->next) {
if (server->value->type == GLUSTER_TRANSPORT_UNIX) {
ret = glfs_set_volfile_server(glfs,
GlusterTransport_lookup[server->value->type],
if (server->value->type == SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
ret = glfs_set_volfile_server(glfs, "unix",
server->value->u.q_unix.path, 0);
} else {
if (parse_uint_full(server->value->u.tcp.port, &port, 10) < 0 ||
if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 ||
port > 65535) {
error_setg(errp, "'%s' is not a valid port number",
server->value->u.tcp.port);
server->value->u.inet.port);
errno = EINVAL;
goto out;
}
ret = glfs_set_volfile_server(glfs,
GlusterTransport_lookup[server->value->type],
server->value->u.tcp.host,
ret = glfs_set_volfile_server(glfs, "tcp",
server->value->u.inet.host,
(int)port);
}
@@ -444,13 +443,13 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
error_setg(errp, "Gluster connection for volume %s, path %s failed"
" to connect", gconf->volume, gconf->path);
for (server = gconf->server; server; server = server->next) {
if (server->value->type == GLUSTER_TRANSPORT_UNIX) {
if (server->value->type == SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
error_append_hint(errp, "hint: failed on socket %s ",
server->value->u.q_unix.path);
} else {
error_append_hint(errp, "hint: failed on host %s and port %s ",
server->value->u.tcp.host,
server->value->u.tcp.port);
server->value->u.inet.host,
server->value->u.inet.port);
}
}
@@ -474,23 +473,6 @@ out:
return NULL;
}
static int qapi_enum_parse(const char *opt)
{
int i;
if (!opt) {
return GLUSTER_TRANSPORT__MAX;
}
for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) {
if (!strcmp(opt, GlusterTransport_lookup[i])) {
return i;
}
}
return i;
}
/*
* Convert the json formatted command line into qapi.
*/
@@ -498,8 +480,8 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
QDict *options, Error **errp)
{
QemuOpts *opts;
GlusterServer *gsconf;
GlusterServerList *curr = NULL;
SocketAddressFlat *gsconf = NULL;
SocketAddressFlatList *curr = NULL;
QDict *backing_options = NULL;
Error *local_err = NULL;
char *str = NULL;
@@ -547,25 +529,31 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
gsconf = g_new0(GlusterServer, 1);
gsconf->type = qapi_enum_parse(ptr);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
if (gsconf->type == GLUSTER_TRANSPORT__MAX) {
error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE,
GLUSTER_OPT_TYPE, "tcp or unix");
gsconf = g_new0(SocketAddressFlat, 1);
if (!strcmp(ptr, "tcp")) {
ptr = "inet"; /* accept legacy "tcp" */
}
gsconf->type = qapi_enum_parse(SocketAddressFlatType_lookup, ptr,
SOCKET_ADDRESS_FLAT_TYPE__MAX, -1,
&local_err);
if (local_err) {
error_append_hint(&local_err,
"Parameter '%s' may be 'inet' or 'unix'\n",
GLUSTER_OPT_TYPE);
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
qemu_opts_del(opts);
if (gsconf->type == GLUSTER_TRANSPORT_TCP) {
/* create opts info from runtime_tcp_opts list */
opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
if (gsconf->type == SOCKET_ADDRESS_FLAT_TYPE_INET) {
/* create opts info from runtime_inet_opts list */
opts = qemu_opts_create(&runtime_inet_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, backing_options, &local_err);
if (local_err) {
goto out;
@@ -578,7 +566,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
gsconf->u.tcp.host = g_strdup(ptr);
gsconf->u.inet.host = g_strdup(ptr);
ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER,
@@ -586,28 +574,28 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
gsconf->u.tcp.port = g_strdup(ptr);
gsconf->u.inet.port = g_strdup(ptr);
/* defend for unsupported fields in InetSocketAddress,
* i.e. @ipv4, @ipv6 and @to
*/
ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
if (ptr) {
gsconf->u.tcp.has_to = true;
gsconf->u.inet.has_to = true;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
if (ptr) {
gsconf->u.tcp.has_ipv4 = true;
gsconf->u.inet.has_ipv4 = true;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
if (ptr) {
gsconf->u.tcp.has_ipv6 = true;
gsconf->u.inet.has_ipv6 = true;
}
if (gsconf->u.tcp.has_to) {
if (gsconf->u.inet.has_to) {
error_setg(&local_err, "Parameter 'to' not supported");
goto out;
}
if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) {
if (gsconf->u.inet.has_ipv4 || gsconf->u.inet.has_ipv6) {
error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
goto out;
}
@@ -632,16 +620,18 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
}
if (gconf->server == NULL) {
gconf->server = g_new0(GlusterServerList, 1);
gconf->server = g_new0(SocketAddressFlatList, 1);
gconf->server->value = gsconf;
curr = gconf->server;
} else {
curr->next = g_new0(GlusterServerList, 1);
curr->next = g_new0(SocketAddressFlatList, 1);
curr->next->value = gsconf;
curr = curr->next;
}
gsconf = NULL;
qdict_del(backing_options, str);
QDECREF(backing_options);
backing_options = NULL;
g_free(str);
str = NULL;
}
@@ -650,11 +640,10 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
out:
error_propagate(errp, local_err);
qapi_free_SocketAddressFlat(gsconf);
qemu_opts_del(opts);
if (str) {
qdict_del(backing_options, str);
g_free(str);
}
g_free(str);
QDECREF(backing_options);
errno = EINVAL;
return -errno;
}
@@ -683,7 +672,7 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
"file.volume=testvol,file.path=/path/a.qcow2"
"[,file.debug=9]"
"[,file.logfile=/path/filename.log],"
"file.server.0.type=tcp,"
"file.server.0.type=inet,"
"file.server.0.host=1.2.3.4,"
"file.server.0.port=24007,"
"file.server.1.transport=unix,"

View File

@@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
}
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
{
BlockDriverState *bs = child->bs;
/* Perform I/O through a temporary buffer so that users who scribble over
* their read buffer while the operation is in progress do not end up
* modifying the image file. This is critical for zero-copy guest I/O
@@ -943,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
size_t skip_bytes;
int ret;
assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
@@ -1001,10 +1005,11 @@ err:
* handles copy on read, zeroing after EOF, and fragmentation of large
* reads; any other features must be implemented by the caller.
*/
static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
BlockDriverState *bs = child->bs;
int64_t total_bytes, max_bytes;
int ret = 0;
uint64_t bytes_remaining = bytes;
@@ -1050,7 +1055,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (!ret || pnum != nb_sectors) {
ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
goto out;
}
}
@@ -1158,7 +1163,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
}
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
tracked_request_end(&req);
@@ -1306,10 +1311,11 @@ fail:
* Forwards an already correctly aligned write request to the BlockDriver,
* after possibly fragmenting it.
*/
static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
bool waited;
int ret;
@@ -1332,6 +1338,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
assert(!waited || !req->serialising);
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
assert(child->perm & BLK_PERM_WRITE);
assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
@@ -1397,12 +1405,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
return ret;
}
static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
int64_t offset,
unsigned int bytes,
BdrvRequestFlags flags,
BdrvTrackedRequest *req)
{
BlockDriverState *bs = child->bs;
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
struct iovec iov;
@@ -1430,7 +1439,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1438,7 +1447,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
memset(buf + head_padding_bytes, 0, zero_bytes);
ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
align, &local_qiov,
flags & ~BDRV_REQ_ZERO_WRITE);
if (ret < 0) {
@@ -1452,7 +1461,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
if (bytes >= align) {
/* Write the aligned part in the middle. */
uint64_t aligned_bytes = bytes & ~(align - 1);
ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
NULL, flags);
if (ret < 0) {
goto fail;
@@ -1468,7 +1477,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(bs, req, offset, align,
ret = bdrv_aligned_preadv(child, req, offset, align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1476,7 +1485,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
memset(buf, 0, bytes);
ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
ret = bdrv_aligned_pwritev(child, req, offset, align, align,
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
}
fail:
@@ -1523,7 +1532,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
if (!qiov) {
ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
goto out;
}
@@ -1542,7 +1551,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
qemu_iovec_init_external(&head_qiov, &head_iov, 1);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
align, &head_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1584,8 +1593,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
align, &tail_qiov, 0);
ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
align, align, &tail_qiov, 0);
if (ret < 0) {
goto fail;
}
@@ -1603,7 +1612,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
bytes = ROUND_UP(bytes, align);
}
ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
@@ -1751,7 +1760,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
if (ret & BDRV_BLOCK_RAW) {
assert(ret & BDRV_BLOCK_OFFSET_VALID);
ret = bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
ret = bdrv_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
*pnum, pnum, file);
goto out;
}

View File

@@ -58,6 +58,7 @@ typedef struct IscsiLun {
int events;
QEMUTimer *nop_timer;
QEMUTimer *event_timer;
QemuMutex mutex;
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
unsigned char *zeroblock;
@@ -252,6 +253,7 @@ static int iscsi_translate_sense(struct scsi_sense *sense)
return ret;
}
/* Called (via iscsi_service) with QemuMutex held. */
static void
iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
@@ -352,6 +354,7 @@ static const AIOCBInfo iscsi_aiocb_info = {
static void iscsi_process_read(void *arg);
static void iscsi_process_write(void *arg);
/* Called with QemuMutex held. */
static void
iscsi_set_events(IscsiLun *iscsilun)
{
@@ -395,10 +398,10 @@ iscsi_process_read(void *arg)
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
aio_context_acquire(iscsilun->aio_context);
qemu_mutex_lock(&iscsilun->mutex);
iscsi_service(iscsi, POLLIN);
iscsi_set_events(iscsilun);
aio_context_release(iscsilun->aio_context);
qemu_mutex_unlock(&iscsilun->mutex);
}
static void
@@ -407,10 +410,10 @@ iscsi_process_write(void *arg)
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
aio_context_acquire(iscsilun->aio_context);
qemu_mutex_lock(&iscsilun->mutex);
iscsi_service(iscsi, POLLOUT);
iscsi_set_events(iscsilun);
aio_context_release(iscsilun->aio_context);
qemu_mutex_unlock(&iscsilun->mutex);
}
static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
@@ -589,6 +592,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
uint64_t lba;
uint32_t num_sectors;
bool fua = flags & BDRV_REQ_FUA;
int r = 0;
if (fua) {
assert(iscsilun->dpofua);
@@ -604,6 +608,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
lba = sector_qemu2lun(sector_num, iscsilun);
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsilun->use_16_for_rw) {
#if LIBISCSI_API_VERSION >= (20160603)
@@ -632,6 +637,7 @@ retry:
}
#endif
if (iTask.task == NULL) {
qemu_mutex_unlock(&iscsilun->mutex);
return -ENOMEM;
}
#if LIBISCSI_API_VERSION < (20160603)
@@ -640,7 +646,9 @@ retry:
#endif
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -655,12 +663,15 @@ retry:
if (iTask.status != SCSI_STATUS_GOOD) {
iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
return iTask.err_code;
r = iTask.err_code;
goto out_unlock;
}
iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
return 0;
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
return r;
}
@@ -693,18 +704,21 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
goto out;
}
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
sector_qemu2lun(sector_num, iscsilun),
8 + 16, iscsi_co_generic_cb,
&iTask) == NULL) {
ret = -ENOMEM;
goto out;
goto out_unlock;
}
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.do_retry) {
@@ -721,20 +735,20 @@ retry:
* because the device is busy or the cmd is not
* supported) we pretend all blocks are allocated
* for backwards compatibility */
goto out;
goto out_unlock;
}
lbas = scsi_datain_unmarshall(iTask.task);
if (lbas == NULL) {
ret = -EIO;
goto out;
goto out_unlock;
}
lbasd = &lbas->descriptors[0];
if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
ret = -EIO;
goto out;
goto out_unlock;
}
*pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
@@ -756,6 +770,8 @@ retry:
if (*pnum > nb_sectors) {
*pnum = nb_sectors;
}
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
out:
if (iTask.task != NULL) {
scsi_free_scsi_task(iTask.task);
@@ -818,6 +834,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsilun->use_16_for_rw) {
#if LIBISCSI_API_VERSION >= (20160603)
@@ -848,6 +865,7 @@ retry:
}
#endif
if (iTask.task == NULL) {
qemu_mutex_unlock(&iscsilun->mutex);
return -ENOMEM;
}
#if LIBISCSI_API_VERSION < (20160603)
@@ -855,7 +873,9 @@ retry:
#endif
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -867,6 +887,7 @@ retry:
iTask.complete = 0;
goto retry;
}
qemu_mutex_unlock(&iscsilun->mutex);
if (iTask.status != SCSI_STATUS_GOOD) {
return iTask.err_code;
@@ -881,15 +902,19 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
struct IscsiTask iTask;
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
0, iscsi_co_generic_cb, &iTask) == NULL) {
qemu_mutex_unlock(&iscsilun->mutex);
return -ENOMEM;
}
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -901,6 +926,7 @@ retry:
iTask.complete = 0;
goto retry;
}
qemu_mutex_unlock(&iscsilun->mutex);
if (iTask.status != SCSI_STATUS_GOOD) {
return iTask.err_code;
@@ -910,6 +936,7 @@ retry:
}
#ifdef __linux__
/* Called (via iscsi_service) with QemuMutex held. */
static void
iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
@@ -1034,6 +1061,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
acb->task->expxferlen = acb->ioh->dxfer_len;
data.size = 0;
qemu_mutex_lock(&iscsilun->mutex);
if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
if (acb->ioh->iovec_count == 0) {
data.data = acb->ioh->dxferp;
@@ -1049,6 +1077,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
iscsi_aio_ioctl_cb,
(data.size > 0) ? &data : NULL,
acb) != 0) {
qemu_mutex_unlock(&iscsilun->mutex);
scsi_free_scsi_task(acb->task);
qemu_aio_unref(acb);
return NULL;
@@ -1068,6 +1097,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
}
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
return &acb->common;
}
@@ -1092,6 +1122,7 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
struct unmap_list list;
int r = 0;
if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
return -ENOTSUP;
@@ -1106,15 +1137,19 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
list.num = count / iscsilun->block_size;
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
iscsi_co_generic_cb, &iTask) == NULL) {
return -ENOMEM;
r = -ENOMEM;
goto out_unlock;
}
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -1131,17 +1166,20 @@ retry:
/* the target might fail with a check condition if it
is not happy with the alignment of the UNMAP request
we silently fail in this case */
return 0;
goto out_unlock;
}
if (iTask.status != SCSI_STATUS_GOOD) {
return iTask.err_code;
r = iTask.err_code;
goto out_unlock;
}
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
return 0;
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
return r;
}
static int
@@ -1153,6 +1191,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
uint64_t lba;
uint32_t nb_blocks;
bool use_16_for_ws = iscsilun->use_16_for_rw;
int r = 0;
if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
return -ENOTSUP;
@@ -1186,6 +1225,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
}
}
qemu_mutex_lock(&iscsilun->mutex);
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
if (use_16_for_ws) {
@@ -1200,12 +1240,15 @@ retry:
0, 0, iscsi_co_generic_cb, &iTask);
}
if (iTask.task == NULL) {
qemu_mutex_unlock(&iscsilun->mutex);
return -ENOMEM;
}
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
@@ -1215,7 +1258,8 @@ retry:
/* WRITE SAME is not supported by the target */
iscsilun->has_write_same = false;
scsi_free_scsi_task(iTask.task);
return -ENOTSUP;
r = -ENOTSUP;
goto out_unlock;
}
if (iTask.task != NULL) {
@@ -1231,7 +1275,8 @@ retry:
if (iTask.status != SCSI_STATUS_GOOD) {
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
return iTask.err_code;
r = iTask.err_code;
goto out_unlock;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
@@ -1242,7 +1287,9 @@ retry:
count >> BDRV_SECTOR_BITS);
}
return 0;
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
return r;
}
static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
@@ -1331,7 +1378,7 @@ static void iscsi_nop_timed_event(void *opaque)
{
IscsiLun *iscsilun = opaque;
aio_context_acquire(iscsilun->aio_context);
qemu_mutex_lock(&iscsilun->mutex);
if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
error_report("iSCSI: NOP timeout. Reconnecting...");
iscsilun->request_timed_out = true;
@@ -1344,7 +1391,7 @@ static void iscsi_nop_timed_event(void *opaque)
iscsi_set_events(iscsilun);
out:
aio_context_release(iscsilun->aio_context);
qemu_mutex_unlock(&iscsilun->mutex);
}
static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
@@ -1890,6 +1937,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
scsi_free_scsi_task(task);
task = NULL;
qemu_mutex_init(&iscsilun->mutex);
iscsi_attach_aio_context(bs, iscsilun->aio_context);
/* Guess the internal cluster (page) size of the iscsi target by the means
@@ -1935,6 +1983,7 @@ static void iscsi_close(BlockDriverState *bs)
iscsi_destroy_context(iscsi);
g_free(iscsilun->zeroblock);
iscsi_allocmap_free(iscsilun);
qemu_mutex_destroy(&iscsilun->mutex);
memset(iscsilun, 0, sizeof(IscsiLun));
}

View File

@@ -12,6 +12,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "trace.h"
#include "block/blockjob_int.h"
#include "block/block_int.h"
@@ -38,7 +39,10 @@ typedef struct MirrorBlockJob {
BlockJob common;
RateLimit limit;
BlockBackend *target;
BlockDriverState *mirror_top_bs;
BlockDriverState *source;
BlockDriverState *base;
/* The name of the graph node to replace */
char *replaces;
/* The BDS to replace */
@@ -327,7 +331,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
BlockDriverState *source = blk_bs(s->common.blk);
BlockDriverState *source = s->source;
int64_t sector_num, first_chunk;
uint64_t delay_ns = 0;
/* At least the first dirty chunk is mirrored in one iteration. */
@@ -386,7 +390,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
nb_chunks * sectors_per_chunk);
bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
while (nb_chunks > 0 && sector_num < end) {
int ret;
int64_t ret;
int io_sectors, io_sectors_acct;
BlockDriverState *file;
enum MirrorMethod {
@@ -497,12 +501,37 @@ static void mirror_exit(BlockJob *job, void *opaque)
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
MirrorExitData *data = opaque;
AioContext *replace_aio_context = NULL;
BlockDriverState *src = blk_bs(s->common.blk);
BlockDriverState *src = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
Error *local_err = NULL;
/* Make sure that the source BDS doesn't go away before we called
* block_job_completed(). */
bdrv_ref(src);
bdrv_ref(mirror_top_bs);
bdrv_ref(target_bs);
/* Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
* inserting target_bs at s->to_replace, where we might not be able to get
* these permissions. */
blk_unref(s->target);
s->target = NULL;
/* We don't access the source any more. Dropping any WRITE/RESIZE is
* required before it could become a backing file of target_bs. */
bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
&error_abort);
if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
BlockDriverState *backing = s->is_none_mode ? src : s->base;
if (backing_bs(target_bs) != backing) {
bdrv_set_backing_hd(target_bs, backing, &local_err);
if (local_err) {
error_report_err(local_err);
data->ret = -EPERM;
}
}
}
if (s->to_replace) {
replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -522,12 +551,12 @@ static void mirror_exit(BlockJob *job, void *opaque)
/* The mirror job has no requests in flight any more, but we need to
* drain potential other users of the BDS before changing the graph. */
bdrv_drained_begin(target_bs);
bdrv_replace_in_backing_chain(to_replace, target_bs);
bdrv_replace_node(to_replace, target_bs, &local_err);
bdrv_drained_end(target_bs);
/* We just changed the BDS the job BB refers to */
blk_remove_bs(job->blk);
blk_insert_bs(job->blk, src);
if (local_err) {
error_report_err(local_err);
data->ret = -EPERM;
}
}
if (s->to_replace) {
bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -538,11 +567,28 @@ static void mirror_exit(BlockJob *job, void *opaque)
aio_context_release(replace_aio_context);
}
g_free(s->replaces);
blk_unref(s->target);
s->target = NULL;
bdrv_unref(target_bs);
/* Remove the mirror filter driver from the graph. Before this, get rid of
* the blockers on the intermediate nodes so that the resulting state is
* valid. Also give up permissions on mirror_top_bs->backing, which might
* block the removal. */
block_job_remove_all_bdrv(job);
bdrv_child_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL);
bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
/* We just changed the BDS the job BB refers to (with either or both of the
* bdrv_replace_node() calls), so switch the BB back so the cleanup does
* the right thing. We don't need any permissions any more now. */
blk_remove_bs(job->blk);
blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
blk_insert_bs(job->blk, mirror_top_bs, &error_abort);
block_job_completed(&s->common, data->ret);
g_free(data);
bdrv_drained_end(src);
bdrv_unref(mirror_top_bs);
bdrv_unref(src);
}
@@ -562,7 +608,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
{
int64_t sector_num, end;
BlockDriverState *base = s->base;
BlockDriverState *bs = blk_bs(s->common.blk);
BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
int ret, n;
@@ -644,7 +690,7 @@ static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
MirrorExitData *data;
BlockDriverState *bs = blk_bs(s->common.blk);
BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
bool need_drain = true;
int64_t length;
@@ -876,9 +922,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
static void mirror_complete(BlockJob *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
BlockDriverState *src, *target;
BlockDriverState *target;
src = blk_bs(job->blk);
target = blk_bs(s->target);
if (!s->synced) {
@@ -910,6 +955,10 @@ static void mirror_complete(BlockJob *job, Error **errp)
replace_aio_context = bdrv_get_aio_context(s->to_replace);
aio_context_acquire(replace_aio_context);
/* TODO Translate this into permission system. Current definition of
* GRAPH_MOD would require to request it for the parents; they might
* not even be BlockDriverStates, however, so a BdrvChild can't address
* them. May need redefinition of GRAPH_MOD. */
error_setg(&s->replace_blocker,
"block device is in use by block-job-complete");
bdrv_op_block_all(s->to_replace, s->replace_blocker);
@@ -918,13 +967,6 @@ static void mirror_complete(BlockJob *job, Error **errp)
aio_context_release(replace_aio_context);
}
if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
BlockDriverState *backing = s->is_none_mode ? src : s->base;
if (backing_bs(target) != backing) {
bdrv_set_backing_hd(target, backing);
}
}
s->should_complete = true;
block_job_enter(&s->common);
}
@@ -980,6 +1022,85 @@ static const BlockJobDriver commit_active_job_driver = {
.drain = mirror_drain,
};
static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
}
static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
}
static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
{
return bdrv_co_flush(bs->backing->bs);
}
static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
BlockDriverState **file)
{
*pnum = nb_sectors;
*file = bs->backing->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
(sector_num << BDRV_SECTOR_BITS);
}
static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
{
return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags);
}
static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
{
return bdrv_co_pdiscard(bs->backing->bs, offset, count);
}
static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts)
{
bdrv_refresh_filename(bs->backing->bs);
pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
bs->backing->bs->filename);
}
static void bdrv_mirror_top_close(BlockDriverState *bs)
{
}
static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
/* Must be able to forward guest writes to the real image */
*nperm = 0;
if (perm & BLK_PERM_WRITE) {
*nperm |= BLK_PERM_WRITE;
}
*nshared = BLK_PERM_ALL;
}
/* Dummy node that provides consistent read to its users without requiring it
* from its backing file and that allows writes on the backing file chain. */
static BlockDriver bdrv_mirror_top = {
.format_name = "mirror_top",
.bdrv_co_preadv = bdrv_mirror_top_preadv,
.bdrv_co_pwritev = bdrv_mirror_top_pwritev,
.bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes,
.bdrv_co_pdiscard = bdrv_mirror_top_pdiscard,
.bdrv_co_flush = bdrv_mirror_top_flush,
.bdrv_co_get_block_status = bdrv_mirror_top_get_block_status,
.bdrv_refresh_filename = bdrv_mirror_top_refresh_filename,
.bdrv_close = bdrv_mirror_top_close,
.bdrv_child_perm = bdrv_mirror_top_child_perm,
};
static void mirror_start_job(const char *job_id, BlockDriverState *bs,
int creation_flags, BlockDriverState *target,
const char *replaces, int64_t speed,
@@ -992,9 +1113,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
void *opaque, Error **errp,
const BlockJobDriver *driver,
bool is_none_mode, BlockDriverState *base,
bool auto_complete)
bool auto_complete, const char *filter_node_name)
{
MirrorBlockJob *s;
BlockDriverState *mirror_top_bs;
bool target_graph_mod;
bool target_is_backing;
Error *local_err = NULL;
int ret;
if (granularity == 0) {
granularity = bdrv_get_default_bitmap_granularity(target);
@@ -1011,14 +1137,62 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
buf_size = DEFAULT_MIRROR_BUF_SIZE;
}
s = block_job_create(job_id, driver, bs, speed, creation_flags,
cb, opaque, errp);
if (!s) {
/* In the case of active commit, add dummy driver to provide consistent
* reads on the top, while disabling it in the intermediate nodes, and make
* the backing chain writable. */
mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name,
BDRV_O_RDWR, errp);
if (mirror_top_bs == NULL) {
return;
}
mirror_top_bs->total_sectors = bs->total_sectors;
/* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
* it alive until block_job_create() even if bs has no parent. */
bdrv_ref(mirror_top_bs);
bdrv_drained_begin(bs);
bdrv_append(mirror_top_bs, bs, &local_err);
bdrv_drained_end(bs);
if (local_err) {
bdrv_unref(mirror_top_bs);
error_propagate(errp, local_err);
return;
}
s->target = blk_new();
blk_insert_bs(s->target, target);
/* Make sure that the source is not resized while the job is running */
s = block_job_create(job_id, driver, mirror_top_bs,
BLK_PERM_CONSISTENT_READ,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
creation_flags, cb, opaque, errp);
bdrv_unref(mirror_top_bs);
if (!s) {
goto fail;
}
s->source = bs;
s->mirror_top_bs = mirror_top_bs;
/* No resize for the target either; while the mirror is still running, a
* consistent read isn't necessarily possible. We could possibly allow
* writes and graph modifications, though it would likely defeat the
* purpose of a mirror, so leave them blocked for now.
*
* In the case of active commit, things look a bit different, though,
* because the target is an already populated backing file in active use.
* We can allow anything except resize there.*/
target_is_backing = bdrv_chain_contains(bs, target);
target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE |
(target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
BLK_PERM_WRITE_UNCHANGED |
(target_is_backing ? BLK_PERM_CONSISTENT_READ |
BLK_PERM_WRITE |
BLK_PERM_GRAPH_MOD : 0));
ret = blk_insert_bs(s->target, target, errp);
if (ret < 0) {
goto fail;
}
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
@@ -1035,24 +1209,44 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
if (!s->dirty_bitmap) {
g_free(s->replaces);
blk_unref(s->target);
block_job_unref(&s->common);
return;
goto fail;
}
block_job_add_bdrv(&s->common, target);
/* Required permissions are already taken with blk_new() */
block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
/* In commit_active_start() all intermediate nodes disappear, so
* any jobs in them must be blocked */
if (bdrv_chain_contains(bs, target)) {
if (target_is_backing) {
BlockDriverState *iter;
for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
block_job_add_bdrv(&s->common, iter);
/* XXX BLK_PERM_WRITE needs to be allowed so we don't block
* ourselves at s->base (if writes are blocked for a node, they are
* also blocked for its backing file). The other options would be a
* second filter driver above s->base (== target). */
ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
errp);
if (ret < 0) {
goto fail;
}
}
}
trace_mirror_start(bs, s, opaque);
block_job_start(&s->common);
return;
fail:
if (s) {
g_free(s->replaces);
blk_unref(s->target);
block_job_unref(&s->common);
}
bdrv_child_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL);
bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
}
void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1061,7 +1255,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap, Error **errp)
bool unmap, const char *filter_node_name, Error **errp)
{
bool is_none_mode;
BlockDriverState *base;
@@ -1075,12 +1269,14 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
speed, granularity, buf_size, backing_mode,
on_source_error, on_target_error, unmap, NULL, NULL, errp,
&mirror_job_driver, is_none_mode, base, false);
&mirror_job_driver, is_none_mode, base, false,
filter_node_name);
}
void commit_active_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, int creation_flags,
int64_t speed, BlockdevOnError on_error,
const char *filter_node_name,
BlockCompletionFunc *cb, void *opaque, Error **errp,
bool auto_complete)
{
@@ -1096,7 +1292,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN,
on_error, on_error, true, cb, opaque, &local_err,
&commit_active_job_driver, false, base, auto_complete);
&commit_active_job_driver, false, base, auto_complete,
filter_node_name);
if (local_err) {
error_propagate(errp, local_err);
goto error_restore_flags;

View File

@@ -278,7 +278,7 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
goto done;
}
iv = qobject_input_visitor_new(crumpled_addr, true);
iv = qobject_input_visitor_new(crumpled_addr);
visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
if (local_err) {
error_propagate(errp, local_err);

View File

@@ -54,6 +54,7 @@ typedef struct NFSClient {
int events;
bool has_zero_init;
AioContext *aio_context;
QemuMutex mutex;
blkcnt_t st_blocks;
bool cache_used;
NFSServer *server;
@@ -191,6 +192,7 @@ static void nfs_parse_filename(const char *filename, QDict *options,
static void nfs_process_read(void *arg);
static void nfs_process_write(void *arg);
/* Called with QemuMutex held. */
static void nfs_set_events(NFSClient *client)
{
int ev = nfs_which_events(client->context);
@@ -209,20 +211,20 @@ static void nfs_process_read(void *arg)
{
NFSClient *client = arg;
aio_context_acquire(client->aio_context);
qemu_mutex_lock(&client->mutex);
nfs_service(client->context, POLLIN);
nfs_set_events(client);
aio_context_release(client->aio_context);
qemu_mutex_unlock(&client->mutex);
}
static void nfs_process_write(void *arg)
{
NFSClient *client = arg;
aio_context_acquire(client->aio_context);
qemu_mutex_lock(&client->mutex);
nfs_service(client->context, POLLOUT);
nfs_set_events(client);
aio_context_release(client->aio_context);
qemu_mutex_unlock(&client->mutex);
}
static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
@@ -242,6 +244,7 @@ static void nfs_co_generic_bh_cb(void *opaque)
aio_co_wake(task->co);
}
/* Called (via nfs_service) with QemuMutex held. */
static void
nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
void *private_data)
@@ -273,12 +276,15 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
nfs_co_init_task(bs, &task);
task.iov = iov;
qemu_mutex_lock(&client->mutex);
if (nfs_pread_async(client->context, client->fh,
offset, bytes, nfs_co_generic_cb, &task) != 0) {
qemu_mutex_unlock(&client->mutex);
return -ENOMEM;
}
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
while (!task.complete) {
qemu_coroutine_yield();
}
@@ -317,9 +323,11 @@ static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
buf = iov->iov[0].iov_base;
}
qemu_mutex_lock(&client->mutex);
if (nfs_pwrite_async(client->context, client->fh,
offset, bytes, buf,
nfs_co_generic_cb, &task) != 0) {
qemu_mutex_unlock(&client->mutex);
if (my_buffer) {
g_free(buf);
}
@@ -327,6 +335,7 @@ static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
}
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
while (!task.complete) {
qemu_coroutine_yield();
}
@@ -349,12 +358,15 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
nfs_co_init_task(bs, &task);
qemu_mutex_lock(&client->mutex);
if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
&task) != 0) {
qemu_mutex_unlock(&client->mutex);
return -ENOMEM;
}
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
while (!task.complete) {
qemu_coroutine_yield();
}
@@ -440,6 +452,7 @@ static void nfs_file_close(BlockDriverState *bs)
{
NFSClient *client = bs->opaque;
nfs_client_close(client);
qemu_mutex_destroy(&client->mutex);
}
static NFSServer *nfs_config(QDict *options, Error **errp)
@@ -461,7 +474,7 @@ static NFSServer *nfs_config(QDict *options, Error **errp)
goto out;
}
iv = qobject_input_visitor_new(crumpled_addr, true);
iv = qobject_input_visitor_new(crumpled_addr);
visit_type_NFSServer(iv, NULL, &server, &local_error);
if (local_error) {
error_propagate(errp, local_error);
@@ -647,6 +660,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
if (ret < 0) {
return ret;
}
qemu_mutex_init(&client->mutex);
bs->total_sectors = ret;
ret = 0;
return ret;
@@ -702,6 +716,7 @@ static int nfs_has_zero_init(BlockDriverState *bs)
return client->has_zero_init;
}
/* Called (via nfs_service) with QemuMutex held. */
static void
nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
void *private_data)

View File

@@ -488,7 +488,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
}
file = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (file == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -762,6 +763,7 @@ static BlockDriver bdrv_parallels = {
.bdrv_probe = parallels_probe,
.bdrv_open = parallels_open,
.bdrv_close = parallels_close,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_get_block_status = parallels_co_get_block_status,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_flush_to_os = parallels_co_flush_to_os,

View File

@@ -823,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
}
qcow_blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (qcow_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1052,6 +1053,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_probe = qcow_probe,
.bdrv_open = qcow_open,
.bdrv_close = qcow_close,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_reopen_prepare = qcow_reopen_prepare,
.bdrv_create = qcow_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,

View File

@@ -2202,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -2266,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow2"));
blk = blk_new_open(filename, NULL, options,
BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -3113,6 +3115,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
uint64_t cluster_size = s->cluster_size;
bool encrypt;
int refcount_bits = s->refcount_bits;
Error *local_err = NULL;
int ret;
QemuOptDesc *desc = opts->list->desc;
Qcow2AmendHelperCBInfo helper_cb_info;
@@ -3262,11 +3265,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
}
if (new_size) {
BlockBackend *blk = blk_new();
blk_insert_bs(blk, bs);
BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
ret = blk_insert_bs(blk, bs, &local_err);
if (ret < 0) {
error_report_err(local_err);
blk_unref(blk);
return ret;
}
ret = blk_truncate(blk, new_size);
blk_unref(blk);
if (ret < 0) {
return ret;
}
@@ -3403,6 +3411,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_reopen_commit = qcow2_reopen_commit,
.bdrv_reopen_abort = qcow2_reopen_abort,
.bdrv_join_options = qcow2_join_options,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = qcow2_co_get_block_status,

View File

@@ -625,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size,
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -1704,6 +1705,7 @@ static BlockDriver bdrv_qed = {
.bdrv_open = bdrv_qed_open,
.bdrv_close = bdrv_qed_close,
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = bdrv_qed_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = bdrv_qed_co_get_block_status,

View File

@@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
/* We can safely add the child now */
bdrv_ref(child_bs);
child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
if (child == NULL) {
s->next_child_index--;
bdrv_unref(child_bs);
goto out;
}
s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
s->children[s->num_children++] = child;
out:
bdrv_drained_end(bs);
}
@@ -1126,6 +1133,8 @@ static BlockDriver bdrv_quorum = {
.bdrv_add_child = quorum_add_child,
.bdrv_del_child = quorum_del_child,
.bdrv_child_perm = bdrv_filter_default_perms,
.is_filter = true,
.bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
};

View File

@@ -467,6 +467,7 @@ BlockDriver bdrv_raw = {
.bdrv_reopen_abort = &raw_reopen_abort,
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_create = &raw_create,
.bdrv_co_preadv = &raw_co_preadv,
.bdrv_co_pwritev = &raw_co_pwritev,

View File

@@ -18,6 +18,7 @@
#include "block/block_int.h"
#include "crypto/secret.h"
#include "qemu/cutils.h"
#include "qapi/qmp/qstring.h"
#include <rbd/librbd.h>
@@ -102,10 +103,10 @@ typedef struct BDRVRBDState {
char *snap;
} BDRVRBDState;
static int qemu_rbd_next_tok(char *dst, int dst_len,
char *src, char delim,
const char *name,
char **p, Error **errp)
static char *qemu_rbd_next_tok(int max_len,
char *src, char delim,
const char *name,
char **p, Error **errp)
{
int l;
char *end;
@@ -127,17 +128,15 @@ static int qemu_rbd_next_tok(char *dst, int dst_len,
}
}
l = strlen(src);
if (l >= dst_len) {
if (l >= max_len) {
error_setg(errp, "%s too long", name);
return -EINVAL;
return NULL;
} else if (l == 0) {
error_setg(errp, "%s too short", name);
return -EINVAL;
return NULL;
}
pstrcpy(dst, dst_len, src);
return 0;
return src;
}
static void qemu_rbd_unescape(char *src)
@@ -153,87 +152,134 @@ static void qemu_rbd_unescape(char *src)
*p = '\0';
}
static int qemu_rbd_parsename(const char *filename,
char *pool, int pool_len,
char *snap, int snap_len,
char *name, int name_len,
char *conf, int conf_len,
Error **errp)
static void qemu_rbd_parse_filename(const char *filename, QDict *options,
Error **errp)
{
const char *start;
char *p, *buf;
int ret;
char *p, *buf, *keypairs;
char *found_str;
size_t max_keypair_size;
Error *local_err = NULL;
if (!strstart(filename, "rbd:", &start)) {
error_setg(errp, "File name must start with 'rbd:'");
return -EINVAL;
return;
}
max_keypair_size = strlen(start) + 1;
buf = g_strdup(start);
keypairs = g_malloc0(max_keypair_size);
p = buf;
*snap = '\0';
*conf = '\0';
ret = qemu_rbd_next_tok(pool, pool_len, p,
'/', "pool name", &p, errp);
if (ret < 0 || !p) {
ret = -EINVAL;
found_str = qemu_rbd_next_tok(RBD_MAX_POOL_NAME_SIZE, p,
'/', "pool name", &p, &local_err);
if (local_err) {
goto done;
}
qemu_rbd_unescape(pool);
if (!p) {
error_setg(errp, "Pool name is required");
goto done;
}
qemu_rbd_unescape(found_str);
qdict_put(options, "pool", qstring_from_str(found_str));
if (strchr(p, '@')) {
ret = qemu_rbd_next_tok(name, name_len, p,
'@', "object name", &p, errp);
if (ret < 0) {
found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
'@', "object name", &p, &local_err);
if (local_err) {
goto done;
}
ret = qemu_rbd_next_tok(snap, snap_len, p,
':', "snap name", &p, errp);
qemu_rbd_unescape(snap);
qemu_rbd_unescape(found_str);
qdict_put(options, "image", qstring_from_str(found_str));
found_str = qemu_rbd_next_tok(RBD_MAX_SNAP_NAME_SIZE, p,
':', "snap name", &p, &local_err);
if (local_err) {
goto done;
}
qemu_rbd_unescape(found_str);
qdict_put(options, "snapshot", qstring_from_str(found_str));
} else {
ret = qemu_rbd_next_tok(name, name_len, p,
':', "object name", &p, errp);
found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
':', "object name", &p, &local_err);
if (local_err) {
goto done;
}
qemu_rbd_unescape(found_str);
qdict_put(options, "image", qstring_from_str(found_str));
}
qemu_rbd_unescape(name);
if (ret < 0 || !p) {
if (!p) {
goto done;
}
ret = qemu_rbd_next_tok(conf, conf_len, p,
'\0', "configuration", &p, errp);
found_str = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
'\0', "configuration", &p, &local_err);
if (local_err) {
goto done;
}
done:
g_free(buf);
return ret;
}
p = found_str;
static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
{
const char *p = conf;
while (*p) {
int len;
const char *end = strchr(p, ':');
if (end) {
len = end - p;
} else {
len = strlen(p);
}
if (strncmp(p, "id=", 3) == 0) {
len -= 3;
strncpy(clientname, p + 3, len);
clientname[len] = '\0';
return clientname;
}
if (end == NULL) {
/* The following are essentially all key/value pairs, and we treat
* 'id' and 'conf' a bit special. Key/value pairs may be in any order. */
while (p) {
char *name, *value;
name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
'=', "conf option name", &p, &local_err);
if (local_err) {
break;
}
p = end + 1;
if (!p) {
error_setg(errp, "conf option %s has no value", name);
break;
}
qemu_rbd_unescape(name);
value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p,
':', "conf option value", &p, &local_err);
if (local_err) {
break;
}
qemu_rbd_unescape(value);
if (!strcmp(name, "conf")) {
qdict_put(options, "conf", qstring_from_str(value));
} else if (!strcmp(name, "id")) {
qdict_put(options, "user" , qstring_from_str(value));
} else {
/* FIXME: This is pretty ugly, and not the right way to do this.
* These should be contained in a structure, and then
* passed explicitly as individual key/value pairs to
* rados. Consider this legacy code that needs to be
* updated. */
char *tmp = g_malloc0(max_keypair_size);
/* only use a delimiter if it is not the first keypair found */
/* These are sets of unknown key/value pairs we'll pass along
* to ceph */
if (keypairs[0]) {
snprintf(tmp, max_keypair_size, ":%s=%s", name, value);
pstrcat(keypairs, max_keypair_size, tmp);
} else {
snprintf(keypairs, max_keypair_size, "%s=%s", name, value);
}
g_free(tmp);
}
}
return NULL;
if (keypairs[0]) {
qdict_put(options, "keyvalue-pairs", qstring_from_str(keypairs));
}
done:
if (local_err) {
error_propagate(errp, local_err);
}
g_free(buf);
g_free(keypairs);
return;
}
@@ -256,26 +302,24 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
return 0;
}
static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
bool only_read_conf_file,
Error **errp)
static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs,
Error **errp)
{
char *p, *buf;
char name[RBD_MAX_CONF_NAME_SIZE];
char value[RBD_MAX_CONF_VAL_SIZE];
char *name;
char *value;
Error *local_err = NULL;
int ret = 0;
buf = g_strdup(conf);
buf = g_strdup(keypairs);
p = buf;
while (p) {
ret = qemu_rbd_next_tok(name, sizeof(name), p,
'=', "conf option name", &p, errp);
if (ret < 0) {
name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
'=', "conf option name", &p, &local_err);
if (local_err) {
break;
}
qemu_rbd_unescape(name);
if (!p) {
error_setg(errp, "conf option %s has no value", name);
@@ -283,36 +327,24 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
break;
}
ret = qemu_rbd_next_tok(value, sizeof(value), p,
':', "conf option value", &p, errp);
if (ret < 0) {
value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p,
':', "conf option value", &p, &local_err);
if (local_err) {
break;
}
qemu_rbd_unescape(value);
if (strcmp(name, "conf") == 0) {
/* read the conf file alone, so it doesn't override more
specific settings for a particular device */
if (only_read_conf_file) {
ret = rados_conf_read_file(cluster, value);
if (ret < 0) {
error_setg_errno(errp, -ret, "error reading conf file %s",
value);
break;
}
}
} else if (strcmp(name, "id") == 0) {
/* ignore, this is parsed by qemu_rbd_parse_clientname() */
} else if (!only_read_conf_file) {
ret = rados_conf_set(cluster, name, value);
if (ret < 0) {
error_setg_errno(errp, -ret, "invalid conf option %s", name);
ret = -EINVAL;
break;
}
ret = rados_conf_set(cluster, name, value);
if (ret < 0) {
error_setg_errno(errp, -ret, "invalid conf option %s", name);
ret = -EINVAL;
break;
}
}
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
}
g_free(buf);
return ret;
}
@@ -328,33 +360,84 @@ static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
}
}
static QemuOptsList runtime_opts = {
.name = "rbd",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "filename",
.type = QEMU_OPT_STRING,
.help = "Specification of the rbd image",
},
{
.name = "password-secret",
.type = QEMU_OPT_STRING,
.help = "ID of secret providing the password",
},
{
.name = "conf",
.type = QEMU_OPT_STRING,
.help = "Rados config file location",
},
{
.name = "pool",
.type = QEMU_OPT_STRING,
.help = "Rados pool name",
},
{
.name = "image",
.type = QEMU_OPT_STRING,
.help = "Image name in the pool",
},
{
.name = "snapshot",
.type = QEMU_OPT_STRING,
.help = "Ceph snapshot name",
},
{
/* maps to 'id' in rados_create() */
.name = "user",
.type = QEMU_OPT_STRING,
.help = "Rados id name",
},
{
.name = "keyvalue-pairs",
.type = QEMU_OPT_STRING,
.help = "Legacy rados key/value option parameters",
},
{
.name = "host",
.type = QEMU_OPT_STRING,
},
{
.name = "port",
.type = QEMU_OPT_STRING,
},
{
.name = "auth",
.type = QEMU_OPT_STRING,
.help = "Supported authentication method, either cephx or none",
},
{ /* end of list */ }
},
};
static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
{
Error *local_err = NULL;
int64_t bytes = 0;
int64_t objsize;
int obj_order = 0;
char pool[RBD_MAX_POOL_NAME_SIZE];
char name[RBD_MAX_IMAGE_NAME_SIZE];
char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
char conf[RBD_MAX_CONF_SIZE];
char clientname_buf[RBD_MAX_CONF_SIZE];
char *clientname;
const char *pool, *name, *conf, *clientname, *keypairs;
const char *secretid;
rados_t cluster;
rados_ioctx_t io_ctx;
int ret;
QDict *options = NULL;
QemuOpts *rbd_opts = NULL;
int ret = 0;
secretid = qemu_opt_get(opts, "password-secret");
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
name, sizeof(name),
conf, sizeof(conf), &local_err) < 0) {
error_propagate(errp, local_err);
return -EINVAL;
}
/* Read out options */
bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
@@ -362,35 +445,55 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
if (objsize) {
if ((objsize - 1) & objsize) { /* not a power of 2? */
error_setg(errp, "obj size needs to be power of 2");
return -EINVAL;
ret = -EINVAL;
goto exit;
}
if (objsize < 4096) {
error_setg(errp, "obj size too small");
return -EINVAL;
ret = -EINVAL;
goto exit;
}
obj_order = ctz32(objsize);
}
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
options = qdict_new();
qemu_rbd_parse_filename(filename, options, &local_err);
if (local_err) {
ret = -EINVAL;
error_propagate(errp, local_err);
goto exit;
}
rbd_opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(rbd_opts, options, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto exit;
}
pool = qemu_opt_get(rbd_opts, "pool");
conf = qemu_opt_get(rbd_opts, "conf");
clientname = qemu_opt_get(rbd_opts, "user");
name = qemu_opt_get(rbd_opts, "image");
keypairs = qemu_opt_get(rbd_opts, "keyvalue-pairs");
ret = rados_create(&cluster, clientname);
if (ret < 0) {
error_setg_errno(errp, -ret, "error initializing");
return ret;
goto exit;
}
if (strstr(conf, "conf=") == NULL) {
/* try default location, but ignore failure */
rados_conf_read_file(cluster, NULL);
} else if (conf[0] != '\0' &&
qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
error_propagate(errp, local_err);
/* try default location when conf=NULL, but ignore failure */
ret = rados_conf_read_file(cluster, conf);
if (conf && ret < 0) {
error_setg_errno(errp, -ret, "error reading conf file %s", conf);
ret = -EIO;
goto shutdown;
}
if (conf[0] != '\0' &&
qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
error_propagate(errp, local_err);
ret = qemu_rbd_set_keypairs(cluster, keypairs, errp);
if (ret < 0) {
ret = -EIO;
goto shutdown;
}
@@ -421,6 +524,10 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
shutdown:
rados_shutdown(cluster);
exit:
QDECREF(options);
qemu_opts_del(rbd_opts);
return ret;
}
@@ -471,38 +578,104 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
qemu_aio_unref(acb);
}
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "rbd",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "filename",
.type = QEMU_OPT_STRING,
.help = "Specification of the rbd image",
},
{
.name = "password-secret",
.type = QEMU_OPT_STRING,
.help = "ID of secret providing the password",
},
{ /* end of list */ }
},
};
#define RBD_MON_HOST 0
#define RBD_AUTH_SUPPORTED 1
static char *qemu_rbd_array_opts(QDict *options, const char *prefix, int type,
Error **errp)
{
int num_entries;
QemuOpts *opts = NULL;
QDict *sub_options;
const char *host;
const char *port;
char *str;
char *rados_str = NULL;
Error *local_err = NULL;
int i;
assert(type == RBD_MON_HOST || type == RBD_AUTH_SUPPORTED);
num_entries = qdict_array_entries(options, prefix);
if (num_entries < 0) {
error_setg(errp, "Parse error on RBD QDict array");
return NULL;
}
for (i = 0; i < num_entries; i++) {
char *strbuf = NULL;
const char *value;
char *rados_str_tmp;
str = g_strdup_printf("%s%d.", prefix, i);
qdict_extract_subqdict(options, &sub_options, str);
g_free(str);
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, sub_options, &local_err);
QDECREF(sub_options);
if (local_err) {
error_propagate(errp, local_err);
g_free(rados_str);
rados_str = NULL;
goto exit;
}
if (type == RBD_MON_HOST) {
host = qemu_opt_get(opts, "host");
port = qemu_opt_get(opts, "port");
value = host;
if (port) {
/* check for ipv6 */
if (strchr(host, ':')) {
strbuf = g_strdup_printf("[%s]:%s", host, port);
} else {
strbuf = g_strdup_printf("%s:%s", host, port);
}
value = strbuf;
} else if (strchr(host, ':')) {
strbuf = g_strdup_printf("[%s]", host);
value = strbuf;
}
} else {
value = qemu_opt_get(opts, "auth");
}
/* each iteration in the for loop will build upon the string, and if
* rados_str is NULL then it is our first pass */
if (rados_str) {
/* separate options with ';', as that is what rados_conf_set()
* requires */
rados_str_tmp = rados_str;
rados_str = g_strdup_printf("%s;%s", rados_str_tmp, value);
g_free(rados_str_tmp);
} else {
rados_str = g_strdup(value);
}
g_free(strbuf);
qemu_opts_del(opts);
opts = NULL;
}
exit:
qemu_opts_del(opts);
return rados_str;
}
static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVRBDState *s = bs->opaque;
char pool[RBD_MAX_POOL_NAME_SIZE];
char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
char conf[RBD_MAX_CONF_SIZE];
char clientname_buf[RBD_MAX_CONF_SIZE];
char *clientname;
const char *pool, *snap, *conf, *clientname, *name, *keypairs;
const char *secretid;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
char *mon_host = NULL;
char *auth_supported = NULL;
int r;
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
@@ -513,41 +686,63 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
filename = qemu_opt_get(opts, "filename");
secretid = qemu_opt_get(opts, "password-secret");
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
s->name, sizeof(s->name),
conf, sizeof(conf), errp) < 0) {
auth_supported = qemu_rbd_array_opts(options, "auth-supported.",
RBD_AUTH_SUPPORTED, &local_err);
if (local_err) {
error_propagate(errp, local_err);
r = -EINVAL;
goto failed_opts;
}
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
mon_host = qemu_rbd_array_opts(options, "server.",
RBD_MON_HOST, &local_err);
if (local_err) {
error_propagate(errp, local_err);
r = -EINVAL;
goto failed_opts;
}
secretid = qemu_opt_get(opts, "password-secret");
pool = qemu_opt_get(opts, "pool");
conf = qemu_opt_get(opts, "conf");
snap = qemu_opt_get(opts, "snapshot");
clientname = qemu_opt_get(opts, "user");
name = qemu_opt_get(opts, "image");
keypairs = qemu_opt_get(opts, "keyvalue-pairs");
r = rados_create(&s->cluster, clientname);
if (r < 0) {
error_setg_errno(errp, -r, "error initializing");
goto failed_opts;
}
s->snap = NULL;
if (snap_buf[0] != '\0') {
s->snap = g_strdup(snap_buf);
s->snap = g_strdup(snap);
if (name) {
pstrcpy(s->name, RBD_MAX_IMAGE_NAME_SIZE, name);
}
if (strstr(conf, "conf=") == NULL) {
/* try default location, but ignore failure */
rados_conf_read_file(s->cluster, NULL);
} else if (conf[0] != '\0') {
r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
/* try default location when conf=NULL, but ignore failure */
r = rados_conf_read_file(s->cluster, conf);
if (conf && r < 0) {
error_setg_errno(errp, -r, "error reading conf file %s", conf);
goto failed_shutdown;
}
r = qemu_rbd_set_keypairs(s->cluster, keypairs, errp);
if (r < 0) {
goto failed_shutdown;
}
if (mon_host) {
r = rados_conf_set(s->cluster, "mon_host", mon_host);
if (r < 0) {
goto failed_shutdown;
}
}
if (conf[0] != '\0') {
r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
if (auth_supported) {
r = rados_conf_set(s->cluster, "auth_supported", auth_supported);
if (r < 0) {
goto failed_shutdown;
}
@@ -601,6 +796,8 @@ failed_shutdown:
g_free(s->snap);
failed_opts:
qemu_opts_del(opts);
g_free(mon_host);
g_free(auth_supported);
return r;
}
@@ -1004,18 +1201,18 @@ static QemuOptsList qemu_rbd_create_opts = {
};
static BlockDriver bdrv_rbd = {
.format_name = "rbd",
.instance_size = sizeof(BDRVRBDState),
.bdrv_needs_filename = true,
.bdrv_file_open = qemu_rbd_open,
.bdrv_close = qemu_rbd_close,
.bdrv_create = qemu_rbd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_get_info = qemu_rbd_getinfo,
.create_opts = &qemu_rbd_create_opts,
.bdrv_getlength = qemu_rbd_getlength,
.bdrv_truncate = qemu_rbd_truncate,
.protocol_name = "rbd",
.format_name = "rbd",
.instance_size = sizeof(BDRVRBDState),
.bdrv_parse_filename = qemu_rbd_parse_filename,
.bdrv_file_open = qemu_rbd_open,
.bdrv_close = qemu_rbd_close,
.bdrv_create = qemu_rbd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_get_info = qemu_rbd_getinfo,
.create_opts = &qemu_rbd_create_opts,
.bdrv_getlength = qemu_rbd_getlength,
.bdrv_truncate = qemu_rbd_truncate,
.protocol_name = "rbd",
.bdrv_aio_readv = qemu_rbd_aio_readv,
.bdrv_aio_writev = qemu_rbd_aio_writev,

View File

@@ -644,7 +644,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
s->replication_state = BLOCK_REPLICATION_FAILOVER;
commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
replication_done, bs, errp, true);
NULL, replication_done, bs, errp, true);
break;
default:
aio_context_release(aio_context);
@@ -660,6 +660,7 @@ BlockDriver bdrv_replication = {
.bdrv_open = replication_open,
.bdrv_close = replication_close,
.bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = replication_getlength,
.bdrv_co_readv = replication_co_readv,

View File

@@ -14,6 +14,8 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qint.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
@@ -374,8 +376,7 @@ struct BDRVSheepdogState {
uint32_t cache_flags;
bool discard_supported;
char *host_spec;
bool is_unix;
SocketAddress *addr;
int fd;
CoMutex lock;
@@ -527,21 +528,36 @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
}
static SocketAddress *sd_socket_address(const char *path,
const char *host, const char *port)
{
SocketAddress *addr = g_new0(SocketAddress, 1);
if (path) {
addr->type = SOCKET_ADDRESS_KIND_UNIX;
addr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
addr->u.q_unix.data->path = g_strdup(path);
} else {
addr->type = SOCKET_ADDRESS_KIND_INET;
addr->u.inet.data = g_new0(InetSocketAddress, 1);
addr->u.inet.data->host = g_strdup(host ?: SD_DEFAULT_ADDR);
addr->u.inet.data->port = g_strdup(port ?: stringify(SD_DEFAULT_PORT));
}
return addr;
}
/* Return -EIO in case of error, file descriptor on success */
static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
{
int fd;
if (s->is_unix) {
fd = unix_connect(s->host_spec, errp);
} else {
fd = inet_connect(s->host_spec, errp);
fd = socket_connect(s->addr, errp, NULL, NULL);
if (fd >= 0) {
int ret = socket_set_nodelay(fd);
if (ret < 0) {
error_report("%s", strerror(errno));
}
if (s->addr->type == SOCKET_ADDRESS_KIND_INET && fd >= 0) {
int ret = socket_set_nodelay(fd);
if (ret < 0) {
error_report("%s", strerror(errno));
}
}
@@ -820,8 +836,7 @@ static void coroutine_fn aio_read_response(void *opaque)
case AIOCB_DISCARD_OBJ:
switch (rsp.result) {
case SD_RES_INVALID_PARMS:
error_report("sheep(%s) doesn't support discard command",
s->host_spec);
error_report("server doesn't support discard command");
rsp.result = SD_RES_SUCCESS;
s->discard_supported = false;
break;
@@ -914,71 +929,155 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
return fd;
}
static int sd_parse_uri(BDRVSheepdogState *s, const char *filename,
char *vdi, uint32_t *snapid, char *tag)
/*
* Parse numeric snapshot ID in @str
* If @str can't be parsed as number, return false.
* Else, if the number is zero or too large, set *@snapid to zero and
* return true.
* Else, set *@snapid to the number and return true.
*/
static bool sd_parse_snapid(const char *str, uint32_t *snapid)
{
URI *uri;
QueryParams *qp = NULL;
int ret = 0;
unsigned long ul;
int ret;
uri = uri_parse(filename);
ret = qemu_strtoul(str, NULL, 10, &ul);
if (ret == -ERANGE) {
ul = ret = 0;
}
if (ret) {
return false;
}
if (ul > UINT32_MAX) {
ul = 0;
}
*snapid = ul;
return true;
}
static bool sd_parse_snapid_or_tag(const char *str,
uint32_t *snapid, char tag[])
{
if (!sd_parse_snapid(str, snapid)) {
*snapid = 0;
if (g_strlcpy(tag, str, SD_MAX_VDI_TAG_LEN) >= SD_MAX_VDI_TAG_LEN) {
return false;
}
} else if (!*snapid) {
return false;
} else {
tag[0] = 0;
}
return true;
}
typedef struct {
const char *path; /* non-null iff transport is tcp */
const char *host; /* valid when transport is tcp */
int port; /* valid when transport is tcp */
char vdi[SD_MAX_VDI_LEN];
char tag[SD_MAX_VDI_TAG_LEN];
uint32_t snap_id;
/* Remainder is only for sd_config_done() */
URI *uri;
QueryParams *qp;
} SheepdogConfig;
static void sd_config_done(SheepdogConfig *cfg)
{
if (cfg->qp) {
query_params_free(cfg->qp);
}
uri_free(cfg->uri);
}
static void sd_parse_uri(SheepdogConfig *cfg, const char *filename,
Error **errp)
{
Error *err = NULL;
QueryParams *qp = NULL;
bool is_unix;
URI *uri;
memset(cfg, 0, sizeof(*cfg));
cfg->uri = uri = uri_parse(filename);
if (!uri) {
return -EINVAL;
error_setg(&err, "invalid URI");
goto out;
}
/* transport */
if (!strcmp(uri->scheme, "sheepdog")) {
s->is_unix = false;
is_unix = false;
} else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
s->is_unix = false;
is_unix = false;
} else if (!strcmp(uri->scheme, "sheepdog+unix")) {
s->is_unix = true;
is_unix = true;
} else {
ret = -EINVAL;
error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp',"
" or 'sheepdog+unix'");
goto out;
}
if (uri->path == NULL || !strcmp(uri->path, "/")) {
ret = -EINVAL;
error_setg(&err, "missing file path in URI");
goto out;
}
pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1);
qp = query_params_parse(uri->query);
if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
ret = -EINVAL;
if (g_strlcpy(cfg->vdi, uri->path + 1, SD_MAX_VDI_LEN)
>= SD_MAX_VDI_LEN) {
error_setg(&err, "VDI name is too long");
goto out;
}
if (s->is_unix) {
cfg->qp = qp = query_params_parse(uri->query);
if (is_unix) {
/* sheepdog+unix:///vdiname?socket=path */
if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
ret = -EINVAL;
if (uri->server || uri->port) {
error_setg(&err, "URI scheme %s doesn't accept a server address",
uri->scheme);
goto out;
}
s->host_spec = g_strdup(qp->p[0].value);
if (!qp->n) {
error_setg(&err,
"URI scheme %s requires query parameter 'socket'",
uri->scheme);
goto out;
}
if (qp->n != 1 || strcmp(qp->p[0].name, "socket")) {
error_setg(&err, "unexpected query parameters");
goto out;
}
cfg->path = qp->p[0].value;
} else {
/* sheepdog[+tcp]://[host:port]/vdiname */
s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR,
uri->port ?: SD_DEFAULT_PORT);
if (qp->n) {
error_setg(&err, "unexpected query parameters");
goto out;
}
cfg->host = uri->server;
cfg->port = uri->port;
}
/* snapshot tag */
if (uri->fragment) {
*snapid = strtoul(uri->fragment, NULL, 10);
if (*snapid == 0) {
pstrcpy(tag, SD_MAX_VDI_TAG_LEN, uri->fragment);
if (!sd_parse_snapid_or_tag(uri->fragment,
&cfg->snap_id, cfg->tag)) {
error_setg(&err, "'%s' is not a valid snapshot ID",
uri->fragment);
goto out;
}
} else {
*snapid = CURRENT_VDI_ID; /* search current vdi */
cfg->snap_id = CURRENT_VDI_ID; /* search current vdi */
}
out:
if (qp) {
query_params_free(qp);
if (err) {
error_propagate(errp, err);
sd_config_done(cfg);
}
uri_free(uri);
return ret;
}
/*
@@ -998,12 +1097,13 @@ out:
* You can run VMs outside the Sheepdog cluster by specifying
* `hostname' and `port' (experimental).
*/
static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
char *vdi, uint32_t *snapid, char *tag)
static void parse_vdiname(SheepdogConfig *cfg, const char *filename,
Error **errp)
{
Error *err = NULL;
char *p, *q, *uri;
const char *host_spec, *vdi_spec;
int nr_sep, ret;
int nr_sep;
strstart(filename, "sheepdog:", &filename);
p = q = g_strdup(filename);
@@ -1038,12 +1138,60 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec);
ret = sd_parse_uri(s, uri, vdi, snapid, tag);
/*
* FIXME We to escape URI meta-characters, e.g. "x?y=z"
* produces "sheepdog://x?y=z". Because of that ...
*/
sd_parse_uri(cfg, uri, &err);
if (err) {
/*
* ... this can fail, but the error message is misleading.
* Replace it by the traditional useless one until the
* escaping is fixed.
*/
error_free(err);
error_setg(errp, "Can't parse filename");
}
g_free(q);
g_free(uri);
}
return ret;
static void sd_parse_filename(const char *filename, QDict *options,
Error **errp)
{
Error *err = NULL;
SheepdogConfig cfg;
char buf[32];
if (strstr(filename, "://")) {
sd_parse_uri(&cfg, filename, &err);
} else {
parse_vdiname(&cfg, filename, &err);
}
if (err) {
error_propagate(errp, err);
return;
}
if (cfg.host) {
qdict_set_default_str(options, "host", cfg.host);
}
if (cfg.port) {
snprintf(buf, sizeof(buf), "%d", cfg.port);
qdict_set_default_str(options, "port", buf);
}
if (cfg.path) {
qdict_set_default_str(options, "path", cfg.path);
}
qdict_set_default_str(options, "vdi", cfg.vdi);
qdict_set_default_str(options, "tag", cfg.tag);
if (cfg.snap_id) {
snprintf(buf, sizeof(buf), "%d", cfg.snap_id);
qdict_set_default_str(options, "snap-id", buf);
}
sd_config_done(&cfg);
}
static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
@@ -1357,15 +1505,33 @@ static void sd_attach_aio_context(BlockDriverState *bs,
co_read_response, NULL, NULL, s);
}
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "sheepdog",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "filename",
.name = "host",
.type = QEMU_OPT_STRING,
},
{
.name = "port",
.type = QEMU_OPT_STRING,
},
{
.name = "path",
.type = QEMU_OPT_STRING,
},
{
.name = "vdi",
.type = QEMU_OPT_STRING,
},
{
.name = "snap-id",
.type = QEMU_OPT_NUMBER,
},
{
.name = "tag",
.type = QEMU_OPT_STRING,
.help = "URL to the sheepdog image",
},
{ /* end of list */ }
},
@@ -1377,12 +1543,11 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
int ret, fd;
uint32_t vid = 0;
BDRVSheepdogState *s = bs->opaque;
char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid;
const char *host, *port, *path, *vdi, *snap_id_str, *tag;
uint64_t snap_id;
char *buf = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
s->bs = bs;
s->aio_context = bdrv_get_aio_context(bs);
@@ -1392,37 +1557,68 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto out;
goto err_no_fd;
}
filename = qemu_opt_get(opts, "filename");
host = qemu_opt_get(opts, "host");
port = qemu_opt_get(opts, "port");
path = qemu_opt_get(opts, "path");
vdi = qemu_opt_get(opts, "vdi");
snap_id_str = qemu_opt_get(opts, "snap-id");
snap_id = qemu_opt_get_number(opts, "snap-id", CURRENT_VDI_ID);
tag = qemu_opt_get(opts, "tag");
if ((host || port) && path) {
error_setg(errp, "can't use 'path' together with 'host' or 'port'");
ret = -EINVAL;
goto err_no_fd;
}
if (!vdi) {
error_setg(errp, "parameter 'vdi' is missing");
ret = -EINVAL;
goto err_no_fd;
}
if (strlen(vdi) >= SD_MAX_VDI_LEN) {
error_setg(errp, "value of parameter 'vdi' is too long");
ret = -EINVAL;
goto err_no_fd;
}
if (snap_id > UINT32_MAX) {
snap_id = 0;
}
if (snap_id_str && !snap_id) {
error_setg(errp, "'snap-id=%s' is not a valid snapshot ID",
snap_id_str);
ret = -EINVAL;
goto err_no_fd;
}
if (!tag) {
tag = "";
}
if (tag && strlen(tag) >= SD_MAX_VDI_TAG_LEN) {
error_setg(errp, "value of parameter 'tag' is too long");
ret = -EINVAL;
goto err_no_fd;
}
s->addr = sd_socket_address(path, host, port);
QLIST_INIT(&s->inflight_aio_head);
QLIST_INIT(&s->failed_aio_head);
QLIST_INIT(&s->inflight_aiocb_head);
s->fd = -1;
memset(vdi, 0, sizeof(vdi));
memset(tag, 0, sizeof(tag));
if (strstr(filename, "://")) {
ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
} else {
ret = parse_vdiname(s, filename, vdi, &snapid, tag);
}
if (ret < 0) {
error_setg(errp, "Can't parse filename");
goto out;
}
s->fd = get_sheep_fd(s, errp);
if (s->fd < 0) {
ret = s->fd;
goto out;
goto err_no_fd;
}
ret = find_vdi_name(s, vdi, snapid, tag, &vid, true, errp);
ret = find_vdi_name(s, vdi, (uint32_t)snap_id, tag, &vid, true, errp);
if (ret) {
goto out;
goto err;
}
/*
@@ -1435,7 +1631,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
}
s->discard_supported = true;
if (snapid || tag[0] != '\0') {
if (snap_id || tag[0]) {
DPRINTF("%" PRIx32 " snapshot inode was open.\n", vid);
s->is_snapshot = true;
}
@@ -1443,7 +1639,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
fd = connect_to_sdog(s, errp);
if (fd < 0) {
ret = fd;
goto out;
goto err;
}
buf = g_malloc(SD_INODE_SIZE);
@@ -1454,7 +1650,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
if (ret) {
error_setg(errp, "Can't read snapshot inode");
goto out;
goto err;
}
memcpy(&s->inode, buf, sizeof(s->inode));
@@ -1466,12 +1662,12 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
qemu_opts_del(opts);
g_free(buf);
return 0;
out:
err:
aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
false, NULL, NULL, NULL, NULL);
if (s->fd >= 0) {
closesocket(s->fd);
}
closesocket(s->fd);
err_no_fd:
qemu_opts_del(opts);
g_free(buf);
return ret;
@@ -1609,7 +1805,7 @@ static int sd_prealloc(const char *filename, Error **errp)
int ret;
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
if (blk == NULL) {
ret = -EIO;
goto out_with_err_set;
@@ -1685,6 +1881,7 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
}
copy = strtol(n1, NULL, 10);
/* FIXME fix error checking by switching to qemu_strtol() */
if (copy > SD_MAX_COPIES || copy < 1) {
return -EINVAL;
}
@@ -1699,6 +1896,7 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
}
parity = strtol(n2, NULL, 10);
/* FIXME fix error checking by switching to qemu_strtol() */
if (parity >= SD_EC_MAX_STRIP || parity < 1) {
return -EINVAL;
}
@@ -1737,29 +1935,34 @@ static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
static int sd_create(const char *filename, QemuOpts *opts,
Error **errp)
{
Error *err = NULL;
int ret = 0;
uint32_t vid = 0;
char *backing_file = NULL;
char *buf = NULL;
BDRVSheepdogState *s;
char tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid;
SheepdogConfig cfg;
uint64_t max_vdi_size;
bool prealloc = false;
s = g_new0(BDRVSheepdogState, 1);
memset(tag, 0, sizeof(tag));
if (strstr(filename, "://")) {
ret = sd_parse_uri(s, filename, s->name, &snapid, tag);
sd_parse_uri(&cfg, filename, &err);
} else {
ret = parse_vdiname(s, filename, s->name, &snapid, tag);
parse_vdiname(&cfg, filename, &err);
}
if (ret < 0) {
error_setg(errp, "Can't parse filename");
if (err) {
error_propagate(errp, err);
goto out;
}
buf = cfg.port ? g_strdup_printf("%d", cfg.port) : NULL;
s->addr = sd_socket_address(cfg.path, cfg.host, buf);
g_free(buf);
strcpy(s->name, cfg.vdi);
sd_config_done(&cfg);
s->inode.vdi_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
@@ -1829,14 +2032,12 @@ static int sd_create(const char *filename, QemuOpts *opts,
if (s->inode.block_size_shift == 0) {
SheepdogVdiReq hdr;
SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr;
Error *local_err = NULL;
int fd;
unsigned int wlen = 0, rlen = 0;
fd = connect_to_sdog(s, &local_err);
fd = connect_to_sdog(s, errp);
if (fd < 0) {
error_report_err(local_err);
ret = -EIO;
ret = fd;
goto out;
}
@@ -1922,7 +2123,7 @@ static void sd_close(BlockDriverState *bs)
aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
false, NULL, NULL, NULL, NULL);
closesocket(s->fd);
g_free(s->host_spec);
qapi_free_SocketAddress(s->addr);
}
static int64_t sd_getlength(BlockDriverState *bs)
@@ -2367,19 +2568,16 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
BDRVSheepdogState *old_s;
char tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid = 0;
int ret = 0;
int ret;
if (!sd_parse_snapid_or_tag(snapshot_id, &snapid, tag)) {
return -EINVAL;
}
old_s = g_new(BDRVSheepdogState, 1);
memcpy(old_s, s, sizeof(BDRVSheepdogState));
snapid = strtoul(snapshot_id, NULL, 10);
if (snapid) {
tag[0] = 0;
} else {
pstrcpy(tag, sizeof(tag), snapshot_id);
}
ret = reload_inode(s, snapid, tag);
if (ret) {
goto out;
@@ -2405,18 +2603,15 @@ out:
#define NR_BATCHED_DISCARD 128
static bool remove_objects(BDRVSheepdogState *s)
static int remove_objects(BDRVSheepdogState *s, Error **errp)
{
int fd, i = 0, nr_objs = 0;
Error *local_err = NULL;
int ret = 0;
bool result = true;
int ret;
SheepdogInode *inode = &s->inode;
fd = connect_to_sdog(s, &local_err);
fd = connect_to_sdog(s, errp);
if (fd < 0) {
error_report_err(local_err);
return false;
return fd;
}
nr_objs = count_data_objs(inode);
@@ -2446,15 +2641,15 @@ static bool remove_objects(BDRVSheepdogState *s)
data_vdi_id[start_idx]),
false, s->cache_flags);
if (ret < 0) {
error_report("failed to discard snapshot inode.");
result = false;
error_setg(errp, "Failed to discard snapshot inode");
goto out;
}
}
ret = 0;
out:
closesocket(fd);
return result;
return ret;
}
static int sd_snapshot_delete(BlockDriverState *bs,
@@ -2462,9 +2657,12 @@ static int sd_snapshot_delete(BlockDriverState *bs,
const char *name,
Error **errp)
{
/*
* FIXME should delete the snapshot matching both @snapshot_id and
* @name, but @name not used here
*/
unsigned long snap_id = 0;
char snap_tag[SD_MAX_VDI_TAG_LEN];
Error *local_err = NULL;
int fd, ret;
char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
BDRVSheepdogState *s = bs->opaque;
@@ -2477,15 +2675,22 @@ static int sd_snapshot_delete(BlockDriverState *bs,
};
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
if (!remove_objects(s)) {
return -1;
ret = remove_objects(s, errp);
if (ret) {
return ret;
}
memset(buf, 0, sizeof(buf));
memset(snap_tag, 0, sizeof(snap_tag));
pstrcpy(buf, SD_MAX_VDI_LEN, s->name);
/* TODO Use sd_parse_snapid() once this mess is cleaned up */
ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id);
if (ret || snap_id > UINT32_MAX) {
/*
* FIXME Since qemu_strtoul() returns -EINVAL when
* @snapshot_id is null, @snapshot_id is mandatory. Correct
* would be to require at least one of @snapshot_id and @name.
*/
error_setg(errp, "Invalid snapshot ID: %s",
snapshot_id ? snapshot_id : "<null>");
return -EINVAL;
@@ -2494,40 +2699,42 @@ static int sd_snapshot_delete(BlockDriverState *bs,
if (snap_id) {
hdr.snapid = (uint32_t) snap_id;
} else {
/* FIXME I suspect we should use @name here */
/* FIXME don't truncate silently */
pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id);
pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
}
ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true,
&local_err);
ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true, errp);
if (ret) {
return ret;
}
fd = connect_to_sdog(s, &local_err);
fd = connect_to_sdog(s, errp);
if (fd < 0) {
error_report_err(local_err);
return -1;
return fd;
}
ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
buf, &wlen, &rlen);
closesocket(fd);
if (ret) {
error_setg_errno(errp, -ret, "Couldn't send request to server");
return ret;
}
switch (rsp->result) {
case SD_RES_NO_VDI:
error_report("%s was already deleted", s->name);
error_setg(errp, "Can't find the snapshot");
return -ENOENT;
case SD_RES_SUCCESS:
break;
default:
error_report("%s, %s", sd_strerror(rsp->result), s->name);
return -1;
error_setg(errp, "%s", sd_strerror(rsp->result));
return -EIO;
}
return ret;
return 0;
}
static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
@@ -2832,7 +3039,7 @@ static BlockDriver bdrv_sheepdog = {
.format_name = "sheepdog",
.protocol_name = "sheepdog",
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_parse_filename = sd_parse_filename,
.bdrv_file_open = sd_open,
.bdrv_reopen_prepare = sd_reopen_prepare,
.bdrv_reopen_commit = sd_reopen_commit,
@@ -2868,7 +3075,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
.format_name = "sheepdog",
.protocol_name = "sheepdog+tcp",
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_parse_filename = sd_parse_filename,
.bdrv_file_open = sd_open,
.bdrv_reopen_prepare = sd_reopen_prepare,
.bdrv_reopen_commit = sd_reopen_commit,
@@ -2904,7 +3111,7 @@ static BlockDriver bdrv_sheepdog_unix = {
.format_name = "sheepdog",
.protocol_name = "sheepdog+unix",
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_parse_filename = sd_parse_filename,
.bdrv_file_open = sd_open,
.bdrv_reopen_prepare = sd_reopen_prepare,
.bdrv_reopen_commit = sd_reopen_commit,

View File

@@ -601,7 +601,7 @@ static InetSocketAddress *ssh_config(QDict *options, Error **errp)
goto out;
}
iv = qobject_input_visitor_new(crumpled_addr, true);
iv = qobject_input_visitor_new(crumpled_addr);
visit_type_InetSocketAddress(iv, NULL, &inet, &local_error);
if (local_error) {
error_propagate(errp, local_error);

View File

@@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque)
StreamCompleteData *data = opaque;
BlockDriverState *bs = blk_bs(job->blk);
BlockDriverState *base = s->base;
Error *local_err = NULL;
if (!block_job_is_cancelled(&s->common) && data->reached_end &&
data->ret == 0) {
@@ -79,11 +80,19 @@ static void stream_complete(BlockJob *job, void *opaque)
}
}
data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
bdrv_set_backing_hd(bs, base);
bdrv_set_backing_hd(bs, base, &local_err);
if (local_err) {
error_report_err(local_err);
data->ret = -EPERM;
goto out;
}
}
out:
/* Reopen the image back in read-only mode if necessary */
if (s->bs_flags != bdrv_get_flags(bs)) {
/* Give up write permissions before making it read-only */
blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
bdrv_reopen(bs, s->bs_flags, NULL);
}
@@ -229,25 +238,35 @@ void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *iter;
int orig_bs_flags;
s = block_job_create(job_id, &stream_job_driver, bs, speed,
BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
return;
}
/* Make sure that the image is opened in read-write mode */
orig_bs_flags = bdrv_get_flags(bs);
if (!(orig_bs_flags & BDRV_O_RDWR)) {
if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
block_job_unref(&s->common);
return;
}
}
/* Block all intermediate nodes between bs and base, because they
* will disappear from the chain after this operation */
/* Prevent concurrent jobs trying to modify the graph structure here, we
* already have our own plans. Also don't allow resize as the image size is
* queried only at the job start and then cached. */
s = block_job_create(job_id, &stream_job_driver, bs,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_GRAPH_MOD,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_WRITE,
speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
goto fail;
}
/* Block all intermediate nodes between bs and base, because they will
* disappear from the chain after this operation. The streaming job reads
* every block only once, assuming that it doesn't change, so block writes
* and resizes. */
for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
block_job_add_bdrv(&s->common, iter);
block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
&error_abort);
}
s->base = base;
@@ -257,4 +276,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
s->on_error = on_error;
trace_stream_start(bs, base, s);
block_job_start(&s->common);
return;
fail:
if (orig_bs_flags != bdrv_get_flags(bs)) {
bdrv_reopen(bs, s->bs_flags, NULL);
}
}

View File

@@ -763,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -891,6 +892,7 @@ static BlockDriver bdrv_vdi = {
.bdrv_open = vdi_open,
.bdrv_close = vdi_close,
.bdrv_reopen_prepare = vdi_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = vdi_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = vdi_co_get_block_status,

View File

@@ -1859,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1983,6 +1984,7 @@ static BlockDriver bdrv_vhdx = {
.bdrv_open = vhdx_open,
.bdrv_close = vhdx_close,
.bdrv_reopen_prepare = vhdx_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_readv = vhdx_co_readv,
.bdrv_co_writev = vhdx_co_writev,
.bdrv_create = vhdx_create,

View File

@@ -1703,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -2071,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
}
new_blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (new_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -2359,6 +2361,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_open = vmdk_open,
.bdrv_check = vmdk_check,
.bdrv_reopen_prepare = vmdk_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_preadv = vmdk_co_preadv,
.bdrv_co_pwritev = vmdk_co_pwritev,
.bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed,

View File

@@ -915,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
&local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1067,6 +1068,7 @@ static BlockDriver bdrv_vpc = {
.bdrv_open = vpc_open,
.bdrv_close = vpc_close,
.bdrv_reopen_prepare = vpc_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = vpc_create,
.bdrv_co_preadv = vpc_co_preadv,

View File

@@ -1394,7 +1394,13 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
return -1;
if (s->qcow) {
int n;
if (bdrv_is_allocated(s->qcow->bs, sector_num, nb_sectors-i, &n)) {
int ret;
ret = bdrv_is_allocated(s->qcow->bs, sector_num,
nb_sectors - i, &n);
if (ret < 0) {
return ret;
}
if (ret) {
DLOG(fprintf(stderr, "sectors %d+%d allocated\n",
(int)sector_num, n));
if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, n)) {
@@ -1668,7 +1674,8 @@ static inline uint32_t modified_fat_get(BDRVVVFATState* s,
}
}
static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num)
static inline bool cluster_was_modified(BDRVVVFATState *s,
uint32_t cluster_num)
{
int was_modified = 0;
int i, dummy;
@@ -1683,7 +1690,13 @@ static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num)
1, &dummy);
}
return was_modified;
/*
* Note that this treats failures to learn allocation status the
* same as if an allocation has occurred. It's as safe as
* anything else, given that a failure to learn allocation status
* will probably result in more failures.
*/
return !!was_modified;
}
static const char* get_basename(const char* path)
@@ -1833,6 +1846,9 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
int res;
res = bdrv_is_allocated(s->qcow->bs, offset + i, 1, &dummy);
if (res < 0) {
return -1;
}
if (!res) {
res = vvfat_read(s->bs, offset, s->cluster_buffer, 1);
if (res) {
@@ -3041,7 +3057,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
&error_abort);
*(void**) backing->opaque = s;
bdrv_set_backing_hd(s->bs, backing);
bdrv_set_backing_hd(s->bs, backing, &error_abort);
bdrv_unref(backing);
return 0;
@@ -3052,6 +3068,27 @@ err:
return ret;
}
static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
BDRVVVFATState *s = bs->opaque;
assert(c == s->qcow || role == &child_backing);
if (c == s->qcow) {
/* This is a private node, nobody should try to attach to it */
*nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
*nshared = BLK_PERM_WRITE_UNCHANGED;
} else {
/* The backing file is there so 'commit' can use it. vvfat doesn't
* access it in any way. */
*nperm = 0;
*nshared = BLK_PERM_ALL;
}
}
static void vvfat_close(BlockDriverState *bs)
{
BDRVVVFATState *s = bs->opaque;
@@ -3077,6 +3114,7 @@ static BlockDriver bdrv_vvfat = {
.bdrv_file_open = vvfat_open,
.bdrv_refresh_limits = vvfat_refresh_limits,
.bdrv_close = vvfat_close,
.bdrv_child_perm = vvfat_child_perm,
.bdrv_co_preadv = vvfat_co_preadv,
.bdrv_co_pwritev = vvfat_co_pwritev,

View File

@@ -52,6 +52,7 @@
#include "sysemu/arch_init.h"
#include "qemu/cutils.h"
#include "qemu/help_option.h"
#include "qemu/throttle-options.h"
static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
@@ -557,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
if ((!file || !*file) && !qdict_size(bs_opts)) {
BlockBackendRootState *blk_rs;
blk = blk_new();
blk = blk_new(0, BLK_PERM_ALL);
blk_rs = blk_get_root_state(blk);
blk_rs->open_flags = bdrv_flags;
blk_rs->read_only = read_only;
@@ -1613,6 +1614,7 @@ typedef struct ExternalSnapshotState {
BlockDriverState *old_bs;
BlockDriverState *new_bs;
AioContext *aio_context;
bool overlay_appended;
} ExternalSnapshotState;
static void external_snapshot_prepare(BlkActionState *common,
@@ -1767,7 +1769,19 @@ static void external_snapshot_prepare(BlkActionState *common,
if (!state->new_bs->drv->supports_backing) {
error_setg(errp, "The snapshot does not support backing images");
return;
}
/* This removes our old bs and adds the new bs. This is an operation that
* can fail, so we need to do it in .prepare; undoing it for abort is
* always possible. */
bdrv_ref(state->new_bs);
bdrv_append(state->new_bs, state->old_bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
state->overlay_appended = true;
}
static void external_snapshot_commit(BlkActionState *common)
@@ -1777,8 +1791,6 @@ static void external_snapshot_commit(BlkActionState *common)
bdrv_set_aio_context(state->new_bs, state->aio_context);
/* This removes our old bs and adds the new bs */
bdrv_append(state->new_bs, state->old_bs);
/* We don't need (or want) to use the transactional
* bdrv_reopen_multiple() across all the entries at once, because we
* don't want to abort all of them if one of them fails the reopen */
@@ -1793,7 +1805,9 @@ static void external_snapshot_abort(BlkActionState *common)
ExternalSnapshotState *state =
DO_UPCAST(ExternalSnapshotState, common, common);
if (state->new_bs) {
bdrv_unref(state->new_bs);
if (state->overlay_appended) {
bdrv_replace_node(state->new_bs, state->old_bs, &error_abort);
}
}
}
@@ -1804,6 +1818,7 @@ static void external_snapshot_clean(BlkActionState *common)
if (state->aio_context) {
bdrv_drained_end(state->old_bs);
aio_context_release(state->aio_context);
bdrv_unref(state->new_bs);
}
}
@@ -2310,7 +2325,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id,
}
if (!locked || force) {
blk_dev_change_media_cb(blk, false);
blk_dev_change_media_cb(blk, false, &error_abort);
}
if (locked && !force) {
@@ -2348,6 +2363,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
Error **errp)
{
BlockBackend *blk;
Error *local_err = NULL;
device = has_device ? device : NULL;
id = has_id ? id : NULL;
@@ -2371,7 +2387,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
return;
}
blk_dev_change_media_cb(blk, true);
blk_dev_change_media_cb(blk, true, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
}
void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
@@ -2424,7 +2444,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
* called at all); therefore, the medium needs to be ejected here.
* Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load
* value passed here (i.e. false). */
blk_dev_change_media_cb(blk, false);
blk_dev_change_media_cb(blk, false, &error_abort);
}
out:
@@ -2434,7 +2454,9 @@ out:
static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
BlockDriverState *bs, Error **errp)
{
Error *local_err = NULL;
bool has_device;
int ret;
/* For BBs without a device, we can exchange the BDS tree at will */
has_device = blk_get_attached_dev(blk);
@@ -2454,7 +2476,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
return;
}
blk_insert_bs(blk, bs);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
return;
}
if (!blk_dev_has_tray(blk)) {
/* For tray-less devices, blockdev-close-tray is a no-op (or may not be
@@ -2462,7 +2487,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
* slot here.
* Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load
* value passed here (i.e. true). */
blk_dev_change_media_cb(blk, true);
blk_dev_change_media_cb(blk, true, &local_err);
if (local_err) {
error_propagate(errp, local_err);
blk_remove_bs(blk);
return;
}
}
}
@@ -2889,8 +2919,11 @@ void qmp_block_resize(bool has_device, const char *device,
goto out;
}
blk = blk_new();
blk_insert_bs(blk, bs);
blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
goto out;
}
/* complete all in-flight operations before resizing the device */
bdrv_drain_all();
@@ -3013,6 +3046,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
bool has_top, const char *top,
bool has_backing_file, const char *backing_file,
bool has_speed, int64_t speed,
bool has_filter_node_name, const char *filter_node_name,
Error **errp)
{
BlockDriverState *bs;
@@ -3028,6 +3062,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
if (!has_speed) {
speed = 0;
}
if (!has_filter_node_name) {
filter_node_name = NULL;
}
/* Important Note:
* libvirt relies on the DeviceNotFound error class in order to probe for
@@ -3102,8 +3139,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
goto out;
}
commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL,
&local_err, false);
BLOCK_JOB_DEFAULT, speed, on_error,
filter_node_name, NULL, NULL, &local_err, false);
} else {
BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
@@ -3111,7 +3148,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
}
commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
on_error, has_backing_file ? backing_file : NULL,
&local_err);
filter_node_name, &local_err);
}
if (local_err != NULL) {
error_propagate(errp, local_err);
@@ -3347,6 +3384,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
bool has_on_target_error,
BlockdevOnError on_target_error,
bool has_unmap, bool unmap,
bool has_filter_node_name,
const char *filter_node_name,
Error **errp)
{
@@ -3368,6 +3407,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_unmap) {
unmap = true;
}
if (!has_filter_node_name) {
filter_node_name = NULL;
}
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -3397,7 +3439,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
mirror_start(job_id, bs, target,
has_replaces ? replaces : NULL,
speed, granularity, buf_size, sync, backing_mode,
on_source_error, on_target_error, unmap, errp);
on_source_error, on_target_error, unmap, filter_node_name,
errp);
}
void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3535,6 +3578,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
arg->has_on_source_error, arg->on_source_error,
arg->has_on_target_error, arg->on_target_error,
arg->has_unmap, arg->unmap,
false, NULL,
&local_err);
bdrv_unref(target_bs);
error_propagate(errp, local_err);
@@ -3553,6 +3597,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
BlockdevOnError on_source_error,
bool has_on_target_error,
BlockdevOnError on_target_error,
bool has_filter_node_name,
const char *filter_node_name,
Error **errp)
{
BlockDriverState *bs;
@@ -3584,6 +3630,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
has_on_source_error, on_source_error,
has_on_target_error, on_target_error,
true, true,
has_filter_node_name, filter_node_name,
&local_err);
error_propagate(errp, local_err);
@@ -4007,83 +4054,11 @@ QemuOptsList qemu_common_drive_opts = {
.name = BDRV_OPT_READ_ONLY,
.type = QEMU_OPT_BOOL,
.help = "open drive file as read-only",
},{
.name = "throttling.iops-total",
.type = QEMU_OPT_NUMBER,
.help = "limit total I/O operations per second",
},{
.name = "throttling.iops-read",
.type = QEMU_OPT_NUMBER,
.help = "limit read operations per second",
},{
.name = "throttling.iops-write",
.type = QEMU_OPT_NUMBER,
.help = "limit write operations per second",
},{
.name = "throttling.bps-total",
.type = QEMU_OPT_NUMBER,
.help = "limit total bytes per second",
},{
.name = "throttling.bps-read",
.type = QEMU_OPT_NUMBER,
.help = "limit read bytes per second",
},{
.name = "throttling.bps-write",
.type = QEMU_OPT_NUMBER,
.help = "limit write bytes per second",
},{
.name = "throttling.iops-total-max",
.type = QEMU_OPT_NUMBER,
.help = "I/O operations burst",
},{
.name = "throttling.iops-read-max",
.type = QEMU_OPT_NUMBER,
.help = "I/O operations read burst",
},{
.name = "throttling.iops-write-max",
.type = QEMU_OPT_NUMBER,
.help = "I/O operations write burst",
},{
.name = "throttling.bps-total-max",
.type = QEMU_OPT_NUMBER,
.help = "total bytes burst",
},{
.name = "throttling.bps-read-max",
.type = QEMU_OPT_NUMBER,
.help = "total bytes read burst",
},{
.name = "throttling.bps-write-max",
.type = QEMU_OPT_NUMBER,
.help = "total bytes write burst",
},{
.name = "throttling.iops-total-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-total-max burst period, in seconds",
},{
.name = "throttling.iops-read-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-read-max burst period, in seconds",
},{
.name = "throttling.iops-write-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-write-max burst period, in seconds",
},{
.name = "throttling.bps-total-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-total-max burst period, in seconds",
},{
.name = "throttling.bps-read-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-read-max burst period, in seconds",
},{
.name = "throttling.bps-write-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-write-max burst period, in seconds",
},{
.name = "throttling.iops-size",
.type = QEMU_OPT_NUMBER,
.help = "when limiting by iops max size of an I/O in bytes",
},{
},
THROTTLE_OPTS,
{
.name = "throttling.group",
.type = QEMU_OPT_STRING,
.help = "name of the block throttling group",

View File

@@ -55,6 +55,19 @@ struct BlockJobTxn {
static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
static char *child_job_get_parent_desc(BdrvChild *c)
{
BlockJob *job = c->opaque;
return g_strdup_printf("%s job '%s'",
BlockJobType_lookup[job->driver->job_type],
job->id);
}
static const BdrvChildRole child_job = {
.get_parent_desc = child_job_get_parent_desc,
.stay_at_node = true,
};
BlockJob *block_job_next(BlockJob *job)
{
if (!job) {
@@ -115,19 +128,44 @@ static void block_job_detach_aio_context(void *opaque)
block_job_unref(job);
}
void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
void block_job_remove_all_bdrv(BlockJob *job)
{
job->nodes = g_slist_prepend(job->nodes, bs);
GSList *l;
for (l = job->nodes; l; l = l->next) {
BdrvChild *c = l->data;
bdrv_op_unblock_all(c->bs, job->blocker);
bdrv_root_unref_child(c);
}
g_slist_free(job->nodes);
job->nodes = NULL;
}
int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
uint64_t perm, uint64_t shared_perm, Error **errp)
{
BdrvChild *c;
c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
job, errp);
if (c == NULL) {
return -EPERM;
}
job->nodes = g_slist_prepend(job->nodes, c);
bdrv_ref(bs);
bdrv_op_block_all(bs, job->blocker);
return 0;
}
void *block_job_create(const char *job_id, const BlockJobDriver *driver,
BlockDriverState *bs, int64_t speed, int flags,
BlockDriverState *bs, uint64_t perm,
uint64_t shared_perm, int64_t speed, int flags,
BlockCompletionFunc *cb, void *opaque, Error **errp)
{
BlockBackend *blk;
BlockJob *job;
int ret;
if (bs->job) {
error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
@@ -159,13 +197,17 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
}
}
blk = blk_new();
blk_insert_bs(blk, bs);
blk = blk_new(perm, shared_perm);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
blk_unref(blk);
return NULL;
}
job = g_malloc0(driver->instance_size);
error_setg(&job->blocker, "block device is in use by block job: %s",
BlockJobType_lookup[driver->job_type]);
block_job_add_bdrv(job, bs);
block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
job->driver = driver;
@@ -228,15 +270,9 @@ void block_job_ref(BlockJob *job)
void block_job_unref(BlockJob *job)
{
if (--job->refcnt == 0) {
GSList *l;
BlockDriverState *bs = blk_bs(job->blk);
bs->job = NULL;
for (l = job->nodes; l; l = l->next) {
bs = l->data;
bdrv_op_unblock_all(bs, job->blocker);
bdrv_unref(bs);
}
g_slist_free(job->nodes);
block_job_remove_all_bdrv(job);
blk_remove_aio_context_notifier(job->blk,
block_job_attached_aio_context,
block_job_detach_aio_context, job);

View File

@@ -58,7 +58,7 @@ static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
ret = qio_channel_read(
chan, (gchar *)buf, len, NULL);
if (ret == 0) {
remove_fd_in_watch(chr);
remove_fd_in_watch(chr, NULL);
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
return FALSE;
}
@@ -89,7 +89,7 @@ static void fd_chr_update_read_handler(Chardev *chr,
{
FDChardev *s = FD_CHARDEV(chr);
remove_fd_in_watch(chr);
remove_fd_in_watch(chr, NULL);
if (s->ioc_in) {
chr->fd_in_tag = io_add_watch_poll(chr, s->ioc_in,
fd_chr_read_poll,
@@ -103,7 +103,7 @@ static void char_fd_finalize(Object *obj)
Chardev *chr = CHARDEV(obj);
FDChardev *s = FD_CHARDEV(obj);
remove_fd_in_watch(chr);
remove_fd_in_watch(chr, NULL);
if (s->ioc_in) {
object_unref(OBJECT(s->ioc_in));
}

View File

@@ -127,14 +127,14 @@ guint io_add_watch_poll(Chardev *chr,
return tag;
}
static void io_remove_watch_poll(guint tag)
static void io_remove_watch_poll(guint tag, GMainContext *context)
{
GSource *source;
IOWatchPoll *iwp;
g_return_if_fail(tag > 0);
source = g_main_context_find_source_by_id(NULL, tag);
source = g_main_context_find_source_by_id(context, tag);
g_return_if_fail(source != NULL);
iwp = io_watch_poll_from_source(source);
@@ -146,10 +146,10 @@ static void io_remove_watch_poll(guint tag)
g_source_destroy(&iwp->parent);
}
void remove_fd_in_watch(Chardev *chr)
void remove_fd_in_watch(Chardev *chr, GMainContext *context)
{
if (chr->fd_in_tag) {
io_remove_watch_poll(chr->fd_in_tag);
io_remove_watch_poll(chr->fd_in_tag, context);
chr->fd_in_tag = 0;
}
}

View File

@@ -36,7 +36,7 @@ guint io_add_watch_poll(Chardev *chr,
gpointer user_data,
GMainContext *context);
void remove_fd_in_watch(Chardev *chr);
void remove_fd_in_watch(Chardev *chr, GMainContext *context);
int io_channel_send(QIOChannel *ioc, const void *buf, size_t len);

View File

@@ -199,7 +199,7 @@ static void pty_chr_state(Chardev *chr, int connected)
g_source_remove(s->open_tag);
s->open_tag = 0;
}
remove_fd_in_watch(chr);
remove_fd_in_watch(chr, NULL);
s->connected = 0;
/* (re-)connect poll interval for idle guests: once per second.
* We check more frequently in case the guests sends data to

View File

@@ -328,7 +328,7 @@ static void tcp_chr_free_connection(Chardev *chr)
}
tcp_set_msgfds(chr, NULL, 0);
remove_fd_in_watch(chr);
remove_fd_in_watch(chr, NULL);
object_unref(OBJECT(s->sioc));
s->sioc = NULL;
object_unref(OBJECT(s->ioc));
@@ -498,7 +498,7 @@ static void tcp_chr_update_read_handler(Chardev *chr,
return;
}
remove_fd_in_watch(chr);
remove_fd_in_watch(chr, NULL);
if (s->ioc) {
chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
tcp_chr_read_poll,

View File

@@ -81,7 +81,7 @@ static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
ret = qio_channel_read(
s->ioc, (char *)s->buf, sizeof(s->buf), NULL);
if (ret <= 0) {
remove_fd_in_watch(chr);
remove_fd_in_watch(chr, NULL);
return FALSE;
}
s->bufcnt = ret;
@@ -101,7 +101,7 @@ static void udp_chr_update_read_handler(Chardev *chr,
{
UdpChardev *s = UDP_CHARDEV(chr);
remove_fd_in_watch(chr);
remove_fd_in_watch(chr, NULL);
if (s->ioc) {
chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
udp_chr_read_poll,
@@ -115,7 +115,7 @@ static void char_udp_finalize(Object *obj)
Chardev *chr = CHARDEV(obj);
UdpChardev *s = UDP_CHARDEV(obj);
remove_fd_in_watch(chr);
remove_fd_in_watch(chr, NULL);
if (s->ioc) {
object_unref(OBJECT(s->ioc));
}

View File

@@ -560,7 +560,7 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
cc = CHARDEV_GET_CLASS(s);
if (!opaque && !fd_can_read && !fd_read && !fd_event) {
fe_open = 0;
remove_fd_in_watch(s);
remove_fd_in_watch(s, context);
} else {
fe_open = 1;
}

74
configure vendored
View File

@@ -301,7 +301,6 @@ glusterfs=""
glusterfs_xlator_opt="no"
glusterfs_discard="no"
glusterfs_zerofill="no"
archipelago="no"
gtk=""
gtkabi=""
gtk_gl="no"
@@ -1101,10 +1100,6 @@ for opt do
;;
--enable-glusterfs) glusterfs="yes"
;;
--disable-archipelago) archipelago="no"
;;
--enable-archipelago) archipelago="yes"
;;
--disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane)
echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2
;;
@@ -1335,6 +1330,12 @@ Advanced options (experts only):
--with-vss-sdk=SDK-path enable Windows VSS support in QEMU Guest Agent
--with-win-sdk=SDK-path path to Windows Platform SDK (to build VSS .tlb)
--tls-priority default TLS protocol/cipher priority string
--enable-gprof QEMU profiling with gprof
--enable-profiler profiler support
--enable-xen-pv-domain-build
xen pv domain builder
--enable-debug-stack-usage
track the maximum stack usage of stacks created by qemu_alloc_stack
Optional features, enabled with --enable-FEATURE and
disabled with --disable-FEATURE, default is enabled if available:
@@ -1396,13 +1397,18 @@ disabled with --disable-FEATURE, default is enabled if available:
seccomp seccomp support
coroutine-pool coroutine freelist (better performance)
glusterfs GlusterFS backend
archipelago Archipelago backend
tpm TPM support
libssh2 ssh block device support
numa libnuma support
tcmalloc tcmalloc support
jemalloc jemalloc support
replication replication support
vhost-vsock virtio sockets device support
opengl opengl support
virglrenderer virgl rendering support
xfsctl xfsctl support
qom-cast-debug cast debugging support
tools build qemu-io, qemu-nbd and qemu-image tools
NOTE: The object files are built at the place where configure is launched
EOF
@@ -3466,37 +3472,6 @@ EOF
fi
fi
##########################################
# archipelago probe
if test "$archipelago" != "no" ; then
cat > $TMPC <<EOF
#include <stdio.h>
#include <xseg/xseg.h>
#include <xseg/protocol.h>
int main(void) {
xseg_initialize();
return 0;
}
EOF
archipelago_libs=-lxseg
if compile_prog "" "$archipelago_libs"; then
archipelago="yes"
libs_tools="$archipelago_libs $libs_tools"
libs_softmmu="$archipelago_libs $libs_softmmu"
echo "WARNING: Please check the licenses of QEMU and libxseg carefully."
echo "GPLv3 versions of libxseg may not be compatible with QEMU's"
echo "license and therefore prevent redistribution."
echo
echo "To disable Archipelago, use --disable-archipelago"
else
if test "$archipelago" = "yes" ; then
feature_not_found "Archipelago backend support" "Install libxseg devel"
fi
archipelago="no"
fi
fi
##########################################
# glusterfs probe
@@ -4747,6 +4722,20 @@ if test "$modules" = "yes" && test "$LD_REL_FLAGS" = ""; then
feature_not_found "modules" "Cannot find how to build relocatable objects"
fi
##########################################
# check for sysmacros.h
have_sysmacros=no
cat > $TMPC << EOF
#include <sys/sysmacros.h>
int main(void) {
return makedev(0, 0);
}
EOF
if compile_prog "" "" ; then
have_sysmacros=yes
fi
##########################################
# End of CC checks
# After here, no more $cc or $ld runs
@@ -5099,7 +5088,6 @@ echo "coroutine backend $coroutine"
echo "coroutine pool $coroutine_pool"
echo "debug stack usage $debug_stack_usage"
echo "GlusterFS support $glusterfs"
echo "Archipelago support $archipelago"
echo "gcov $gcov_tool"
echo "gcov enabled $gcov"
echo "TPM support $tpm"
@@ -5640,11 +5628,6 @@ if test "$glusterfs_zerofill" = "yes" ; then
echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak
fi
if test "$archipelago" = "yes" ; then
echo "CONFIG_ARCHIPELAGO=m" >> $config_host_mak
echo "ARCHIPELAGO_LIBS=$archipelago_libs" >> $config_host_mak
fi
if test "$libssh2" = "yes" ; then
echo "CONFIG_LIBSSH2=m" >> $config_host_mak
echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
@@ -5719,6 +5702,10 @@ if test "$have_af_vsock" = "yes" ; then
echo "CONFIG_AF_VSOCK=y" >> $config_host_mak
fi
if test "$have_sysmacros" = "yes" ; then
echo "CONFIG_SYSMACROS=y" >> $config_host_mak
fi
# Hold two types of flag:
# CONFIG_THREAD_SETNAME_BYTHREAD - we've got a way of setting the name on
# a thread we have a handle to
@@ -5894,6 +5881,7 @@ case "$target_name" in
TARGET_BASE_ARCH=i386
;;
alpha)
mttcg="yes"
;;
arm|armeb)
TARGET_ARCH=arm

View File

@@ -33,6 +33,7 @@
#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
#include "hw/i386/apic.h"
#endif
#include "sysemu/cpus.h"
#include "sysemu/replay.h"
/* -icount align implementation. */
@@ -186,12 +187,6 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
cc->set_pc(cpu, last_tb->pc);
}
}
if (tb_exit == TB_EXIT_REQUESTED) {
/* We were asked to stop executing TBs (probably a pending
* interrupt. We've now stopped, so clear the flag.
*/
atomic_set(&cpu->tcg_exit_req, 0);
}
return ret;
}
@@ -560,8 +555,9 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
qemu_mutex_unlock_iothread();
}
if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
/* Finally, check if we need to exit to the main loop. */
if (unlikely(atomic_read(&cpu->exit_request)
|| (use_icount && cpu->icount_decr.u16.low + cpu->icount_extra == 0))) {
atomic_set(&cpu->exit_request, 0);
cpu->exception_index = EXCP_INTERRUPT;
return true;
@@ -571,62 +567,54 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
}
static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
TranslationBlock **last_tb, int *tb_exit,
SyncClocks *sc)
TranslationBlock **last_tb, int *tb_exit)
{
uintptr_t ret;
if (unlikely(atomic_read(&cpu->exit_request))) {
return;
}
int32_t insns_left;
trace_exec_tb(tb, tb->pc);
ret = cpu_tb_exec(cpu, tb);
tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
*tb_exit = ret & TB_EXIT_MASK;
switch (*tb_exit) {
case TB_EXIT_REQUESTED:
if (*tb_exit != TB_EXIT_REQUESTED) {
*last_tb = tb;
return;
}
*last_tb = NULL;
insns_left = atomic_read(&cpu->icount_decr.u32);
atomic_set(&cpu->icount_decr.u16.high, 0);
if (insns_left < 0) {
/* Something asked us to stop executing chained TBs; just
* continue round the main loop. Whatever requested the exit
* will also have set something else (eg interrupt_request)
* which we will handle next time around the loop. But we
* need to ensure the tcg_exit_req read in generated code
* comes before the next read of cpu->exit_request or
* cpu->interrupt_request.
* will also have set something else (eg exit_request or
* interrupt_request) which we will handle next time around
* the loop. But we need to ensure the zeroing of icount_decr
* comes before the next read of cpu->exit_request
* or cpu->interrupt_request.
*/
smp_mb();
*last_tb = NULL;
break;
case TB_EXIT_ICOUNT_EXPIRED:
{
/* Instruction counter expired. */
#ifdef CONFIG_USER_ONLY
abort();
#else
int insns_left = cpu->icount_decr.u32;
*last_tb = NULL;
if (cpu->icount_extra && insns_left >= 0) {
/* Refill decrementer and continue execution. */
cpu->icount_extra += insns_left;
insns_left = MIN(0xffff, cpu->icount_extra);
cpu->icount_extra -= insns_left;
cpu->icount_decr.u16.low = insns_left;
} else {
if (insns_left > 0) {
/* Execute remaining instructions. */
cpu_exec_nocache(cpu, insns_left, tb, false);
align_clocks(sc, cpu);
}
cpu->exception_index = EXCP_INTERRUPT;
cpu_loop_exit(cpu);
return;
}
/* Instruction counter expired. */
assert(use_icount);
#ifndef CONFIG_USER_ONLY
if (cpu->icount_extra) {
/* Refill decrementer and continue execution. */
cpu->icount_extra += insns_left;
insns_left = MIN(0xffff, cpu->icount_extra);
cpu->icount_extra -= insns_left;
cpu->icount_decr.u16.low = insns_left;
} else {
/* Execute any remaining instructions, then let the main loop
* handle the next event.
*/
if (insns_left > 0) {
cpu_exec_nocache(cpu, insns_left, tb, false);
}
break;
}
#endif
}
default:
*last_tb = tb;
break;
}
}
/* main execution loop */
@@ -635,7 +623,7 @@ int cpu_exec(CPUState *cpu)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
int ret;
SyncClocks sc;
SyncClocks sc = { 0 };
/* replay_interrupt may need current_cpu */
current_cpu = cpu;
@@ -683,7 +671,7 @@ int cpu_exec(CPUState *cpu)
while (!cpu_handle_interrupt(cpu, &last_tb)) {
TranslationBlock *tb = tb_find(cpu, last_tb, tb_exit);
cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
/* Try to align the host and virtual clocks
if the guest is in advance */
align_clocks(&sc, cpu);

144
cpus.c
View File

@@ -51,10 +51,6 @@
#include "hw/nmi.h"
#include "sysemu/replay.h"
#ifndef _WIN32
#include "qemu/compatfd.h"
#endif
#ifdef CONFIG_LINUX
#include <sys/prctl.h>
@@ -185,10 +181,7 @@ static bool check_tcg_memory_orders_compatible(void)
static bool default_mttcg_enabled(void)
{
QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
const char *rr = qemu_opt_get(icount_opts, "rr");
if (rr || TCG_OVERSIZED_GUEST) {
if (use_icount || TCG_OVERSIZED_GUEST) {
return false;
} else {
#ifdef TARGET_SUPPORTS_MTTCG
@@ -206,7 +199,13 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp)
if (strcmp(t, "multi") == 0) {
if (TCG_OVERSIZED_GUEST) {
error_setg(errp, "No MTTCG when guest word size > hosts");
} else if (use_icount) {
error_setg(errp, "No MTTCG when icount is enabled");
} else {
#ifndef TARGET_SUPPORT_MTTCG
error_report("Guest not yet converted to MTTCG - "
"you may get unexpected results");
#endif
if (!check_tcg_memory_orders_compatible()) {
error_report("Guest expects a stronger memory ordering "
"than the host provides");
@@ -801,6 +800,27 @@ static void qemu_cpu_kick_rr_cpu(void)
} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
}
static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
{
}
void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
{
if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
qemu_notify_event();
return;
}
if (!qemu_in_vcpu_thread() && first_cpu) {
/* qemu_cpu_kick is not enough to kick a halted CPU out of
* qemu_tcg_wait_io_event. async_run_on_cpu, instead,
* causes cpu_thread_is_idle to return false. This way,
* handle_icount_deadline can run.
*/
async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
}
}
static void kick_tcg_thread(void *opaque)
{
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
@@ -924,13 +944,23 @@ static void sigbus_reraise(void)
abort();
}
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
void *ctx)
static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
{
if (kvm_on_sigbus(siginfo->ssi_code,
(void *)(intptr_t)siginfo->ssi_addr)) {
if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
sigbus_reraise();
}
if (current_cpu) {
/* Called asynchronously in VCPU thread. */
if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
sigbus_reraise();
}
} else {
/* Called synchronously (via signalfd) in main thread. */
if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
sigbus_reraise();
}
}
}
static void qemu_init_sigbus(void)
@@ -939,92 +969,17 @@ static void qemu_init_sigbus(void)
memset(&action, 0, sizeof(action));
action.sa_flags = SA_SIGINFO;
action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
action.sa_sigaction = sigbus_handler;
sigaction(SIGBUS, &action, NULL);
prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
}
static void qemu_kvm_eat_signals(CPUState *cpu)
{
struct timespec ts = { 0, 0 };
siginfo_t siginfo;
sigset_t waitset;
sigset_t chkset;
int r;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
sigaddset(&waitset, SIGBUS);
do {
r = sigtimedwait(&waitset, &siginfo, &ts);
if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
perror("sigtimedwait");
exit(1);
}
switch (r) {
case SIGBUS:
if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
sigbus_reraise();
}
break;
default:
break;
}
r = sigpending(&chkset);
if (r == -1) {
perror("sigpending");
exit(1);
}
} while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
}
#else /* !CONFIG_LINUX */
static void qemu_init_sigbus(void)
{
}
static void qemu_kvm_eat_signals(CPUState *cpu)
{
}
#endif /* !CONFIG_LINUX */
#ifndef _WIN32
static void dummy_signal(int sig)
{
}
static void qemu_kvm_init_cpu_signals(CPUState *cpu)
{
int r;
sigset_t set;
struct sigaction sigact;
memset(&sigact, 0, sizeof(sigact));
sigact.sa_handler = dummy_signal;
sigaction(SIG_IPI, &sigact, NULL);
pthread_sigmask(SIG_BLOCK, NULL, &set);
sigdelset(&set, SIG_IPI);
sigdelset(&set, SIGBUS);
r = kvm_set_signal_mask(cpu, &set);
if (r) {
fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
exit(1);
}
}
#else /* _WIN32 */
static void qemu_kvm_init_cpu_signals(CPUState *cpu)
{
abort();
}
#endif /* _WIN32 */
static QemuMutex qemu_global_mutex;
static QemuThread io_thread;
@@ -1099,7 +1054,6 @@ static void qemu_kvm_wait_io_event(CPUState *cpu)
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
qemu_kvm_eat_signals(cpu);
qemu_wait_io_event_common(cpu);
}
@@ -1122,7 +1076,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
exit(1);
}
qemu_kvm_init_cpu_signals(cpu);
kvm_init_cpu_signals(cpu);
/* signal CPU creation */
cpu->created = true;
@@ -1212,12 +1166,15 @@ static int64_t tcg_get_icount_limit(void)
static void handle_icount_deadline(void)
{
assert(qemu_in_vcpu_thread());
if (use_icount) {
int64_t deadline =
qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
if (deadline == 0) {
/* Wake up other AioContexts. */
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
}
}
}
@@ -1330,6 +1287,11 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
qemu_account_warp_timer();
/* Run the timers here. This is much more efficient than
* waking up the I/O thread and waiting for completion.
*/
handle_icount_deadline();
if (!cpu) {
cpu = first_cpu;
}
@@ -1371,8 +1333,6 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
atomic_mb_set(&cpu->exit_request, 0);
}
handle_icount_deadline();
qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
deal_with_unplugged_cpus();
}

View File

@@ -769,14 +769,13 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
if (memory_region_is_unassigned(mr)) {
CPUClass *cc = CPU_GET_CLASS(cpu);
if (cc->do_unassigned_access) {
cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
} else {
report_bad_exec(cpu, addr);
exit(1);
}
cpu_unassigned_access(cpu, addr, false, true, 0, 4);
/* The CPU's unassigned access hook might have longjumped out
* with an exception. If it didn't (or there was no hook) then
* we can't proceed further.
*/
report_bad_exec(cpu, addr);
exit(1);
}
p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
return qemu_ram_addr_from_host_nofail(p);

View File

@@ -63,18 +63,14 @@ static bool mode_need_iv[QCRYPTO_CIPHER_MODE__MAX] = {
size_t qcrypto_cipher_get_block_len(QCryptoCipherAlgorithm alg)
{
if (alg >= G_N_ELEMENTS(alg_key_len)) {
return 0;
}
assert(alg < G_N_ELEMENTS(alg_key_len));
return alg_block_len[alg];
}
size_t qcrypto_cipher_get_key_len(QCryptoCipherAlgorithm alg)
{
if (alg >= G_N_ELEMENTS(alg_key_len)) {
return 0;
}
assert(alg < G_N_ELEMENTS(alg_key_len));
return alg_key_len[alg];
}

View File

@@ -48,6 +48,7 @@ static int qcrypto_ivgen_essiv_init(QCryptoIVGen *ivgen,
&salt, &nhash,
errp) < 0) {
g_free(essiv);
g_free(salt);
return -1;
}

View File

@@ -42,6 +42,8 @@ CONFIG_ARM11MPCORE=y
CONFIG_A9MPCORE=y
CONFIG_A15MPCORE=y
CONFIG_ARM_V7M=y
CONFIG_ARM_GIC=y
CONFIG_ARM_GIC_KVM=$(CONFIG_KVM)
CONFIG_ARM_TIMER=y

View File

@@ -59,3 +59,4 @@ CONFIG_I82801B11=y
CONFIG_SMBIOS=y
CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
CONFIG_PXB=y
CONFIG_ACPI_VMGENID=y

View File

@@ -59,3 +59,4 @@ CONFIG_I82801B11=y
CONFIG_SMBIOS=y
CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
CONFIG_PXB=y
CONFIG_ACPI_VMGENID=y

View File

@@ -3901,9 +3901,9 @@ print_insn_arm (bfd_vma pc, struct disassemble_info *info)
status = info->read_memory_func (pc, (bfd_byte *)b, 4, info);
if (little)
given = (b[0]) | (b[1] << 8) | (b[2] << 16) | (b[3] << 24);
given = (b[0]) | (b[1] << 8) | (b[2] << 16) | ((unsigned)b[3] << 24);
else
given = (b[3]) | (b[2] << 8) | (b[1] << 16) | (b[0] << 24);
given = (b[3]) | (b[2] << 8) | (b[1] << 16) | ((unsigned)b[0] << 24);
}
else
{

View File

@@ -2009,7 +2009,7 @@ print_with_operands (const struct cris_opcode *opcodep,
case 'n':
{
/* Like N but pc-relative to the start of the insn. */
unsigned long number
uint32_t number
= (buffer[2] + buffer[3] * 256 + buffer[4] * 65536
+ buffer[5] * 0x1000000 + addr);
@@ -2201,7 +2201,7 @@ print_with_operands (const struct cris_opcode *opcodep,
{
/* It's [pc+]. This cannot possibly be anything
but an address. */
unsigned long number
uint32_t number
= prefix_buffer[2] + prefix_buffer[3] * 256
+ prefix_buffer[4] * 65536
+ prefix_buffer[5] * 0x1000000;

View File

@@ -1788,8 +1788,7 @@ fput_fp_reg_r (unsigned reg, disassemble_info *info)
if (reg < 4)
(*info->fprintf_func) (info->stream, "fpe%d", reg * 2 + 1);
else
(*info->fprintf_func) (info->stream, "%sR",
reg ? fp_reg_names[reg] : "fr0");
(*info->fprintf_func) (info->stream, "%sR", fp_reg_names[reg]);
}
static void

View File

@@ -4043,7 +4043,7 @@ print_insn (bfd_vma pc, disassemble_info *info)
}
}
if (putop (dp->name, sizeflag) == 0)
if (dp->name != NULL && putop (dp->name, sizeflag) == 0)
{
for (i = 0; i < MAX_OPERANDS; ++i)
{

View File

@@ -4685,10 +4685,11 @@ get_field (const unsigned char *data, enum floatformat_byteorders order,
/* This is the last byte; zero out the bits which are not part of
this field. */
result |=
(*(data + cur_byte) & ((1 << (len - cur_bitshift)) - 1))
(unsigned long)(*(data + cur_byte)
& ((1 << (len - cur_bitshift)) - 1))
<< cur_bitshift;
else
result |= *(data + cur_byte) << cur_bitshift;
result |= (unsigned long)*(data + cur_byte) << cur_bitshift;
cur_bitshift += FLOATFORMAT_CHAR_BIT;
if (order == floatformat_little)
++cur_byte;

View File

@@ -748,9 +748,11 @@ read_insn_microblaze (bfd_vma memaddr,
}
if (info->endian == BFD_ENDIAN_BIG)
inst = (ibytes[0] << 24) | (ibytes[1] << 16) | (ibytes[2] << 8) | ibytes[3];
inst = ((unsigned)ibytes[0] << 24) | (ibytes[1] << 16)
| (ibytes[2] << 8) | ibytes[3];
else if (info->endian == BFD_ENDIAN_LITTLE)
inst = (ibytes[3] << 24) | (ibytes[2] << 16) | (ibytes[1] << 8) | ibytes[0];
inst = ((unsigned)ibytes[3] << 24) | (ibytes[2] << 16)
| (ibytes[1] << 8) | ibytes[0];
else
abort ();

View File

@@ -41,3 +41,12 @@ has three bootable devices target1, target3, target5 connected to it,
the option ROM will have a boot method for each of them, but it is not
possible to map from boot method back to a specific target. This is a
shortcoming of the PC BIOS boot specification.
== Mixing bootindex and boot order parameters ==
Note that it does not make sense to use the bootindex property together
with the "-boot order=..." (or "-boot once=...") parameter. The guest
firmware implementations normally either support the one or the other,
but not both parameters at the same time. Mixing them will result in
undefined behavior, and thus the guest firmware will likely not boot
from the expected devices.

View File

@@ -0,0 +1,281 @@
# mach-virt - VirtIO guest (graphical console)
# =========================================================
#
# Usage:
#
# $ qemu-system-aarch64 \
# -nodefaults \
# -readconfig mach-virt-graphical.cfg \
# -cpu host
#
# You will probably need to tweak the lines marked as
# CHANGE ME before being able to use this configuration!
#
# The guest will have a selection of VirtIO devices
# tailored towards optimal performance with modern guests,
# and will be accessed through a graphical console.
#
# ---------------------------------------------------------
#
# Using -nodefaults is required to have full control over
# the virtual hardware: when it's specified, QEMU will
# populate the board with only the builtin peripherals,
# such as the PL011 UART, plus a PCI Express Root Bus; the
# user will then have to explicitly add further devices.
#
# The PCI Express Root Bus shows up in the guest as:
#
# 00:00.0 Host bridge
#
# This configuration file adds a number of other useful
# devices, more specifically:
#
# 00:01.0 Display controller
# 00.1c.* PCI bridge (PCI Express Root Ports)
# 01:00.0 SCSI storage controller
# 02:00.0 Ethernet controller
# 03:00.0 USB controller
#
# More information about these devices is available below.
# Machine options
# =========================================================
#
# We use the virt machine type and enable KVM acceleration
# for better performance.
#
# Using less than 1 GiB of memory is probably not going to
# yield good performance in the guest, and might even lead
# to obscure boot issues in some cases.
#
# Unfortunately, there is no way to configure the CPU model
# in this file, so it will have to be provided on the
# command line, but we can configure the guest to use the
# same GIC version as the host.
[machine]
type = "virt"
accel = "kvm"
gic-version = "host"
[memory]
size = "1024"
# Firmware configuration
# =========================================================
#
# There are two parts to the firmware: a read-only image
# containing the executable code, which is shared between
# guests, and a read/write variable store that is owned
# by one specific guest, exclusively, and is used to
# record information such as the UEFI boot order.
#
# For any new guest, its permanent, private variable store
# should initially be copied from the template file
# provided along with the firmware binary.
#
# Depending on the OS distribution you're using on the
# host, the name of the package containing the firmware
# binary and variable store template, as well as the paths
# to the files themselves, will be different. For example:
#
# Fedora
# edk2-aarch64 (pkg)
# /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw (bin)
# /usr/share/edk2/aarch64/vars-template-pflash.raw (var)
#
# RHEL
# AAVMF (pkg)
# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
# /usr/share/AAVMF/AAVMF_VARS.fd (var)
#
# Debian/Ubuntu
# qemu-efi (pkg)
# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
# /usr/share/AAVMF/AAVMF_VARS.fd (var)
[drive "uefi-binary"]
file = "/usr/share/AAVMF/AAVMF_CODE.fd" # CHANGE ME
format = "raw"
if = "pflash"
unit = "0"
readonly = "on"
[drive "uefi-varstore"]
file = "guest_VARS.fd" # CHANGE ME
format = "raw"
if = "pflash"
unit = "1"
# PCI bridge (PCI Express Root Ports)
# =========================================================
#
# We create eight PCI Express Root Ports, and we plug them
# all into separate functions of the same slot. Some of
# them will be used by devices, the rest will remain
# available for hotplug.
[device "pcie.1"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.0"
port = "1"
chassis = "1"
multifunction = "on"
[device "pcie.2"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.1"
port = "2"
chassis = "2"
[device "pcie.3"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.2"
port = "3"
chassis = "3"
[device "pcie.4"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.3"
port = "4"
chassis = "4"
[device "pcie.5"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.4"
port = "5"
chassis = "5"
[device "pcie.6"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.5"
port = "6"
chassis = "6"
[device "pcie.7"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.6"
port = "7"
chassis = "7"
[device "pcie.8"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.7"
port = "8"
chassis = "8"
# SCSI storage controller (and storage)
# =========================================================
#
# We use virtio-scsi here so that we can (hot)plug a large
# number of disks without running into issues; a SCSI disk,
# backed by a qcow2 disk image on the host's filesystem, is
# attached to it.
#
# We also create an optical disk, mostly for installation
# purposes: once the guest OS has been succesfully
# installed, the guest will no longer boot from optical
# media. If you don't want, or no longer want, to have an
# optical disk in the guest you can safely comment out
# all relevant sections below.
[device "scsi"]
driver = "virtio-scsi-pci"
bus = "pcie.1"
addr = "00.0"
[device "scsi-disk"]
driver = "scsi-hd"
bus = "scsi.0"
drive = "disk"
bootindex = "1"
[drive "disk"]
file = "guest.qcow2" # CHANGE ME
format = "qcow2"
if = "none"
[device "scsi-optical-disk"]
driver = "scsi-cd"
bus = "scsi.0"
drive = "optical-disk"
bootindex = "2"
[drive "optical-disk"]
file = "install.iso" # CHANGE ME
format = "raw"
if = "none"
# Ethernet controller
# =========================================================
#
# We use virtio-net for improved performance over emulated
# hardware; on the host side, we take advantage of user
# networking so that the QEMU process doesn't require any
# additional privileges.
[netdev "hostnet"]
type = "user"
[device "net"]
driver = "virtio-net-pci"
netdev = "hostnet"
bus = "pcie.2"
addr = "00.0"
# USB controller (and input devices)
# =========================================================
#
# We add a virtualization-friendly USB 3.0 controller and
# a USB keyboard / USB tablet combo so that graphical
# guests can be controlled appropriately.
[device "usb"]
driver = "nec-usb-xhci"
bus = "pcie.3"
addr = "00.0"
[device "keyboard"]
driver = "usb-kbd"
bus = "usb.0"
[device "tablet"]
driver = "usb-tablet"
bus = "usb.0"
# Display controller
# =========================================================
#
# We use virtio-gpu because the legacy VGA framebuffer is
# very troublesome on aarch64, and virtio-gpu is the only
# video device that doesn't implement it.
#
# If you're running the guest on a remote, potentially
# headless host, you will probably want to append something
# like
#
# -display vnc=127.0.0.1:0
#
# to the command line in order to prevent QEMU from
# creating a graphical display window on the host and
# enable remote access instead.
[device "video"]
driver = "virtio-gpu"
bus = "pcie.0"
addr = "01.0"

243
docs/mach-virt-serial.cfg Normal file
View File

@@ -0,0 +1,243 @@
# mach-virt - VirtIO guest (serial console)
# =========================================================
#
# Usage:
#
# $ qemu-system-aarch64 \
# -nodefaults \
# -readconfig mach-virt-serial.cfg \
# -display none -serial mon:stdio \
# -cpu host
#
# You will probably need to tweak the lines marked as
# CHANGE ME before being able to use this configuration!
#
# The guest will have a selection of VirtIO devices
# tailored towards optimal performance with modern guests,
# and will be accessed through the serial console.
#
# ---------------------------------------------------------
#
# Using -nodefaults is required to have full control over
# the virtual hardware: when it's specified, QEMU will
# populate the board with only the builtin peripherals,
# such as the PL011 UART, plus a PCI Express Root Bus; the
# user will then have to explicitly add further devices.
#
# The PCI Express Root Bus shows up in the guest as:
#
# 00:00.0 Host bridge
#
# This configuration file adds a number of other useful
# devices, more specifically:
#
# 00.1c.* PCI bridge (PCI Express Root Ports)
# 01:00.0 SCSI storage controller
# 02:00.0 Ethernet controller
#
# More information about these devices is available below.
#
# We use '-display none' to prevent QEMU from creating a
# graphical display window, which would serve no use in
# this specific configuration, and '-serial mon:stdio' to
# multiplex the guest's serial console and the QEMU monitor
# to the host's stdio; use 'Ctrl+A h' to learn how to
# switch between the two and more.
# Machine options
# =========================================================
#
# We use the virt machine type and enable KVM acceleration
# for better performance.
#
# Using less than 1 GiB of memory is probably not going to
# yield good performance in the guest, and might even lead
# to obscure boot issues in some cases.
#
# Unfortunately, there is no way to configure the CPU model
# in this file, so it will have to be provided on the
# command line, but we can configure the guest to use the
# same GIC version as the host.
[machine]
type = "virt"
accel = "kvm"
gic-version = "host"
[memory]
size = "1024"
# Firmware configuration
# =========================================================
#
# There are two parts to the firmware: a read-only image
# containing the executable code, which is shared between
# guests, and a read/write variable store that is owned
# by one specific guest, exclusively, and is used to
# record information such as the UEFI boot order.
#
# For any new guest, its permanent, private variable store
# should initially be copied from the template file
# provided along with the firmware binary.
#
# Depending on the OS distribution you're using on the
# host, the name of the package containing the firmware
# binary and variable store template, as well as the paths
# to the files themselves, will be different. For example:
#
# Fedora
# edk2-aarch64 (pkg)
# /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw (bin)
# /usr/share/edk2/aarch64/vars-template-pflash.raw (var)
#
# RHEL
# AAVMF (pkg)
# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
# /usr/share/AAVMF/AAVMF_VARS.fd (var)
#
# Debian/Ubuntu
# qemu-efi (pkg)
# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
# /usr/share/AAVMF/AAVMF_VARS.fd (var)
[drive "uefi-binary"]
file = "/usr/share/AAVMF/AAVMF_CODE.fd" # CHANGE ME
format = "raw"
if = "pflash"
unit = "0"
readonly = "on"
[drive "uefi-varstore"]
file = "guest_VARS.fd" # CHANGE ME
format = "raw"
if = "pflash"
unit = "1"
# PCI bridge (PCI Express Root Ports)
# =========================================================
#
# We create eight PCI Express Root Ports, and we plug them
# all into separate functions of the same slot. Some of
# them will be used by devices, the rest will remain
# available for hotplug.
[device "pcie.1"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.0"
port = "1"
chassis = "1"
multifunction = "on"
[device "pcie.2"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.1"
port = "2"
chassis = "2"
[device "pcie.3"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.2"
port = "3"
chassis = "3"
[device "pcie.4"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.3"
port = "4"
chassis = "4"
[device "pcie.5"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.4"
port = "5"
chassis = "5"
[device "pcie.6"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.5"
port = "6"
chassis = "6"
[device "pcie.7"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.6"
port = "7"
chassis = "7"
[device "pcie.8"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.7"
port = "8"
chassis = "8"
# SCSI storage controller (and storage)
# =========================================================
#
# We use virtio-scsi here so that we can (hot)plug a large
# number of disks without running into issues; a SCSI disk,
# backed by a qcow2 disk image on the host's filesystem, is
# attached to it.
#
# We also create an optical disk, mostly for installation
# purposes: once the guest OS has been succesfully
# installed, the guest will no longer boot from optical
# media. If you don't want, or no longer want, to have an
# optical disk in the guest you can safely comment out
# all relevant sections below.
[device "scsi"]
driver = "virtio-scsi-pci"
bus = "pcie.1"
addr = "00.0"
[device "scsi-disk"]
driver = "scsi-hd"
bus = "scsi.0"
drive = "disk"
bootindex = "1"
[drive "disk"]
file = "guest.qcow2" # CHANGE ME
format = "qcow2"
if = "none"
[device "scsi-optical-disk"]
driver = "scsi-cd"
bus = "scsi.0"
drive = "optical-disk"
bootindex = "2"
[drive "optical-disk"]
file = "install.iso" # CHANGE ME
format = "raw"
if = "none"
# Ethernet controller
# =========================================================
#
# We use virtio-net for improved performance over emulated
# hardware; on the host side, we take advantage of user
# networking so that the QEMU process doesn't require any
# additional privileges.
[netdev "hostnet"]
type = "user"
[device "net"]
driver = "virtio-net-pci"
netdev = "hostnet"
bus = "pcie.2"
addr = "00.0"

View File

@@ -161,6 +161,11 @@ include/hw/hw.h.
=== More about versions ===
Version numbers are intended for major incompatible changes to the
migration of a device, and using them breaks backwards-migration
compatibility; in general most changes can be made by adding Subsections
(see below) or _TEST macros (see below) which won't break compatibility.
You can see that there are several version fields:
- version_id: the maximum version_id supported by VMState for that device.
@@ -175,6 +180,9 @@ version_id. And the function load_state_old() (if present) is able to
load state from minimum_version_id_old to minimum_version_id. This
function is deprecated and will be removed when no more users are left.
Saving state will always create a section with the 'version_id' value
and thus can't be loaded by any older QEMU.
=== Massaging functions ===
Sometimes, it is not enough to be able to save the state directly
@@ -292,6 +300,56 @@ save/send this state when we are in the middle of a pio operation
not enabled, the values on that fields are garbage and don't need to
be sent.
Using a condition function that checks a 'property' to determine whether
to send a subsection allows backwards migration compatibility when
new subsections are added.
For example;
a) Add a new property using DEFINE_PROP_BOOL - e.g. support-foo and
default it to true.
b) Add an entry to the HW_COMPAT_ for the previous version
that sets the property to false.
c) Add a static bool support_foo function that tests the property.
d) Add a subsection with a .needed set to the support_foo function
e) (potentially) Add a pre_load that sets up a default value for 'foo'
to be used if the subsection isn't loaded.
Now that subsection will not be generated when using an older
machine type and the migration stream will be accepted by older
QEMU versions. pre-load functions can be used to initialise state
on the newer version so that they default to suitable values
when loading streams created by older QEMU versions that do not
generate the subsection.
In some cases subsections are added for data that had been accidentally
omitted by earlier versions; if the missing data causes the migration
process to succeed but the guest to behave badly then it may be better
to send the subsection and cause the migration to explicitly fail
with the unknown subsection error. If the bad behaviour only happens
with certain data values, making the subsection conditional on
the data value (rather than the machine type) allows migrations to succeed
in most cases. In general the preference is to tie the subsection to
the machine type, and allow reliable migrations, unless the behaviour
from omission of the subsection is really bad.
= Not sending existing elements =
Sometimes members of the VMState are no longer needed;
removing them will break migration compatibility
making them version dependent and bumping the version will break backwards
migration compatibility.
The best way is to:
a) Add a new property/compatibility/function in the same way for subsections
above.
b) replace the VMSTATE macro with the _TEST version of the macro, e.g.:
VMSTATE_UINT32(foo, barstruct)
becomes
VMSTATE_UINT32_TEST(foo, barstruct, pre_version_baz)
Sometime in the future when we no longer care about the ancient
versions these can be killed off.
= Return path =
In most migration scenarios there is only a single data path that runs
@@ -482,3 +540,16 @@ request for a page that has already been sent is ignored. Duplicate requests
such as this can happen as a page is sent at about the same time the
destination accesses it.
=== Postcopy with hugepages ===
Postcopy now works with hugetlbfs backed memory:
a) The linux kernel on the destination must support userfault on hugepages.
b) The huge-page configuration on the source and destination VMs must be
identical; i.e. RAMBlocks on both sides must use the same page size.
c) Note that -mem-path /dev/hugepages will fall back to allocating normal
RAM if it doesn't have enough hugepages, triggering (b) to fail.
Using -mem-prealloc enforces the allocation using hugepages.
d) Care should be taken with the size of hugepage used; postcopy with 2MB
hugepages works well, however 1GB hugepages are likely to be problematic
since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link,
and until the full page is transferred the destination thread is blocked.

View File

@@ -1,152 +0,0 @@
################################################################
#
# qemu -M q35 creates a bare machine with just the very essential
# chipset devices being present:
#
# 00.0 - Host bridge
# 1f.0 - ISA bridge / LPC
# 1f.2 - SATA (AHCI) controller
# 1f.3 - SMBus controller
#
# This config file documents the other devices and how they are
# created. You can simply use "-readconfig $thisfile" to create
# them all. Here is a overview:
#
# 19.0 - Ethernet controller (not created, our e1000 emulation
# doesn't emulate the ich9 device).
# 1a.* - USB Controller #2 (ehci + uhci companions)
# 1b.0 - HD Audio Controller
# 1c.* - PCI Express Ports
# 1d.* - USB Controller #1 (ehci + uhci companions,
# "qemu -M q35 -usb" creates these too)
# 1e.0 - PCI Bridge
#
[device "ich9-ehci-2"]
driver = "ich9-usb-ehci2"
multifunction = "on"
bus = "pcie.0"
addr = "1a.7"
[device "ich9-uhci-4"]
driver = "ich9-usb-uhci4"
multifunction = "on"
bus = "pcie.0"
addr = "1a.0"
masterbus = "ich9-ehci-2.0"
firstport = "0"
[device "ich9-uhci-5"]
driver = "ich9-usb-uhci5"
multifunction = "on"
bus = "pcie.0"
addr = "1a.1"
masterbus = "ich9-ehci-2.0"
firstport = "2"
[device "ich9-uhci-6"]
driver = "ich9-usb-uhci6"
multifunction = "on"
bus = "pcie.0"
addr = "1a.2"
masterbus = "ich9-ehci-2.0"
firstport = "4"
[device "ich9-hda-audio"]
driver = "ich9-intel-hda"
bus = "pcie.0"
addr = "1b.0"
[device "ich9-pcie-port-1"]
driver = "ioh3420"
multifunction = "on"
bus = "pcie.0"
addr = "1c.0"
port = "1"
chassis = "1"
[device "ich9-pcie-port-2"]
driver = "ioh3420"
multifunction = "on"
bus = "pcie.0"
addr = "1c.1"
port = "2"
chassis = "2"
[device "ich9-pcie-port-3"]
driver = "ioh3420"
multifunction = "on"
bus = "pcie.0"
addr = "1c.2"
port = "3"
chassis = "3"
[device "ich9-pcie-port-4"]
driver = "ioh3420"
multifunction = "on"
bus = "pcie.0"
addr = "1c.3"
port = "4"
chassis = "4"
##
# Example PCIe switch with two downstream ports
#
#[device "pcie-switch-upstream-port-1"]
# driver = "x3130-upstream"
# bus = "ich9-pcie-port-4"
# addr = "00.0"
#
#[device "pcie-switch-downstream-port-1-1"]
# driver = "xio3130-downstream"
# multifunction = "on"
# bus = "pcie-switch-upstream-port-1"
# addr = "00.0"
# port = "1"
# chassis = "5"
#
#[device "pcie-switch-downstream-port-1-2"]
# driver = "xio3130-downstream"
# multifunction = "on"
# bus = "pcie-switch-upstream-port-1"
# addr = "00.1"
# port = "1"
# chassis = "6"
[device "ich9-ehci-1"]
driver = "ich9-usb-ehci1"
multifunction = "on"
bus = "pcie.0"
addr = "1d.7"
[device "ich9-uhci-1"]
driver = "ich9-usb-uhci1"
multifunction = "on"
bus = "pcie.0"
addr = "1d.0"
masterbus = "ich9-ehci-1.0"
firstport = "0"
[device "ich9-uhci-2"]
driver = "ich9-usb-uhci2"
multifunction = "on"
bus = "pcie.0"
addr = "1d.1"
masterbus = "ich9-ehci-1.0"
firstport = "2"
[device "ich9-uhci-3"]
driver = "ich9-usb-uhci3"
multifunction = "on"
bus = "pcie.0"
addr = "1d.2"
masterbus = "ich9-ehci-1.0"
firstport = "4"
[device "ich9-pci-bridge"]
driver = "i82801b11-bridge"
bus = "pcie.0"
addr = "1e.0"

288
docs/q35-emulated.cfg Normal file
View File

@@ -0,0 +1,288 @@
# q35 - Emulated guest (graphical console)
# =========================================================
#
# Usage:
#
# $ qemu-system-x86_64 \
# -nodefaults \
# -readconfig q35-emulated.cfg
#
# You will probably need to tweak the lines marked as
# CHANGE ME before being able to use this configuration!
#
# The guest will have a selection of emulated devices that
# closely resembles that of a physical machine, and will be
# accessed through a graphical console.
#
# ---------------------------------------------------------
#
# Using -nodefaults is required to have full control over
# the virtual hardware: when it's specified, QEMU will
# populate the board with only the builtin peripherals
# plus a small selection of core PCI devices and
# controllers; the user will then have to explicitly add
# further devices.
#
# The core PCI devices show up in the guest as:
#
# 00:00.0 Host bridge
# 00:1f.0 ISA bridge / LPC
# 00:1f.2 SATA (AHCI) controller
# 00:1f.3 SMBus controller
#
# This configuration file adds a number of devices that
# are pretty much guaranteed to be present in every single
# physical machine based on q35, more specifically:
#
# 00:01.0 VGA compatible controller
# 00:19.0 Ethernet controller
# 00:1a.* USB controller (#2)
# 00:1b.0 Audio device
# 00:1c.* PCI bridge (PCI Express Root Ports)
# 00:1d.* USB Controller (#1)
# 00:1e.0 PCI bridge (legacy PCI bridge)
#
# More information about these devices is available below.
# Machine options
# =========================================================
#
# We use the q35 machine type and enable KVM acceleration
# for better performance.
#
# Using less than 1 GiB of memory is probably not going to
# yield good performance in the guest, and might even lead
# to obscure boot issues in some cases.
#
# Unfortunately, there is no way to configure the CPU model
# in this file, so it will have to be provided on the
# command line.
[machine]
type = "q35"
accel = "kvm"
[memory]
size = "1024"
# PCI bridge (PCI Express Root Ports)
# =========================================================
#
# We add four PCI Express Root Ports, all sharing the same
# slot on the PCI Express Root Bus. These ports support
# hotplug.
[device "ich9-pcie-port-1"]
driver = "ioh3420"
multifunction = "on"
bus = "pcie.0"
addr = "1c.0"
port = "1"
chassis = "1"
[device "ich9-pcie-port-2"]
driver = "ioh3420"
multifunction = "on"
bus = "pcie.0"
addr = "1c.1"
port = "2"
chassis = "2"
[device "ich9-pcie-port-3"]
driver = "ioh3420"
multifunction = "on"
bus = "pcie.0"
addr = "1c.2"
port = "3"
chassis = "3"
[device "ich9-pcie-port-4"]
driver = "ioh3420"
multifunction = "on"
bus = "pcie.0"
addr = "1c.3"
port = "4"
chassis = "4"
# PCI bridge (legacy PCI bridge)
# =========================================================
#
# This bridge can be used to build an independent topology
# for legacy PCI devices. PCI Express devices should be
# plugged into PCI Express slots instead, so ideally there
# will be no devices connected to this bridge.
[device "ich9-pci-bridge"]
driver = "i82801b11-bridge"
bus = "pcie.0"
addr = "1e.0"
# SATA storage
# =========================================================
#
# An implicit SATA controller is created automatically for
# every single q35 guest; here we create a disk, backed by
# a qcow2 disk image on the host's filesystem, and attach
# it to that controller so that the guest can use it.
#
# We also create an optical disk, mostly for installation
# purposes: once the guest OS has been succesfully
# installed, the guest will no longer boot from optical
# media. If you don't want, or no longer want, to have an
# optical disk in the guest you can safely comment out
# all relevant sections below.
[device "sata-disk"]
driver = "ide-hd"
bus = "ide.0"
drive = "disk"
bootindex = "1"
[drive "disk"]
file = "guest.qcow2" # CHANGE ME
format = "qcow2"
if = "none"
[device "sata-optical-disk"]
driver = "ide-cd"
bus = "ide.1"
drive = "optical-disk"
bootindex = "2"
[drive "optical-disk"]
file = "install.iso" # CHANGE ME
format = "raw"
if = "none"
# USB controller (#1)
# =========================================================
#
# EHCI controller + UHCI companion controllers.
[device "ich9-ehci-1"]
driver = "ich9-usb-ehci1"
multifunction = "on"
bus = "pcie.0"
addr = "1d.7"
[device "ich9-uhci-1"]
driver = "ich9-usb-uhci1"
multifunction = "on"
bus = "pcie.0"
addr = "1d.0"
masterbus = "ich9-ehci-1.0"
firstport = "0"
[device "ich9-uhci-2"]
driver = "ich9-usb-uhci2"
multifunction = "on"
bus = "pcie.0"
addr = "1d.1"
masterbus = "ich9-ehci-1.0"
firstport = "2"
[device "ich9-uhci-3"]
driver = "ich9-usb-uhci3"
multifunction = "on"
bus = "pcie.0"
addr = "1d.2"
masterbus = "ich9-ehci-1.0"
firstport = "4"
# USB controller (#2)
# =========================================================
#
# EHCI controller + UHCI companion controllers.
[device "ich9-ehci-2"]
driver = "ich9-usb-ehci2"
multifunction = "on"
bus = "pcie.0"
addr = "1a.7"
[device "ich9-uhci-4"]
driver = "ich9-usb-uhci4"
multifunction = "on"
bus = "pcie.0"
addr = "1a.0"
masterbus = "ich9-ehci-2.0"
firstport = "0"
[device "ich9-uhci-5"]
driver = "ich9-usb-uhci5"
multifunction = "on"
bus = "pcie.0"
addr = "1a.1"
masterbus = "ich9-ehci-2.0"
firstport = "2"
[device "ich9-uhci-6"]
driver = "ich9-usb-uhci6"
multifunction = "on"
bus = "pcie.0"
addr = "1a.2"
masterbus = "ich9-ehci-2.0"
firstport = "4"
# Ethernet controller
# =========================================================
#
# We add a Gigabit Ethernet interface to the guest; on the
# host side, we take advantage of user networking so that
# the QEMU process doesn't require any additional
# privileges.
[netdev "hostnet"]
type = "user"
[device "net"]
driver = "e1000"
netdev = "hostnet"
bus = "pcie.0"
addr = "19.0"
# VGA compatible controller
# =========================================================
#
# We use stdvga instead of Cirrus as it supports more video
# modes and is closer to what actual hardware looks like.
#
# If you're running the guest on a remote, potentially
# headless host, you will probably want to append something
# like
#
# -display vnc=127.0.0.1:0
#
# to the command line in order to prevent QEMU from
# creating a graphical display window on the host and
# enable remote access instead.
[device "video"]
driver = "VGA"
bus = "pcie.0"
addr = "01.0"
# Audio device
# =========================================================
#
# The sound card is a legacy PCI device that is plugged
# directly into the PCI Express Root Bus.
[device "ich9-hda-audio"]
driver = "ich9-intel-hda"
bus = "pcie.0"
addr = "1b.0"
[device "ich9-hda-duplex"]
driver = "hda-duplex"
bus = "ich9-hda-audio.0"
cad = "0"

View File

@@ -0,0 +1,248 @@
# q35 - VirtIO guest (graphical console)
# =========================================================
#
# Usage:
#
# $ qemu-system-x86_64 \
# -nodefaults \
# -readconfig q35-virtio-graphical.cfg
#
# You will probably need to tweak the lines marked as
# CHANGE ME before being able to use this configuration!
#
# The guest will have a selection of VirtIO devices
# tailored towards optimal performance with modern guests,
# and will be accessed through a graphical console.
#
# ---------------------------------------------------------
#
# Using -nodefaults is required to have full control over
# the virtual hardware: when it's specified, QEMU will
# populate the board with only the builtin peripherals
# plus a small selection of core PCI devices and
# controllers; the user will then have to explicitly add
# further devices.
#
# The core PCI devices show up in the guest as:
#
# 00:00.0 Host bridge
# 00:1f.0 ISA bridge / LPC
# 00:1f.2 SATA (AHCI) controller
# 00:1f.3 SMBus controller
#
# This configuration file adds a number of other useful
# devices, more specifically:
#
# 00:01.0 VGA compatible controller
# 00:1b.0 Audio device
# 00.1c.* PCI bridge (PCI Express Root Ports)
# 01:00.0 SCSI storage controller
# 02:00.0 Ethernet controller
# 03:00.0 USB controller
#
# More information about these devices is available below.
# Machine options
# =========================================================
#
# We use the q35 machine type and enable KVM acceleration
# for better performance.
#
# Using less than 1 GiB of memory is probably not going to
# yield good performance in the guest, and might even lead
# to obscure boot issues in some cases.
[machine]
type = "q35"
accel = "kvm"
[memory]
size = "1024"
# PCI bridge (PCI Express Root Ports)
# =========================================================
#
# We create eight PCI Express Root Ports, and we plug them
# all into separate functions of the same slot. Some of
# them will be used by devices, the rest will remain
# available for hotplug.
[device "pcie.1"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.0"
port = "1"
chassis = "1"
multifunction = "on"
[device "pcie.2"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.1"
port = "2"
chassis = "2"
[device "pcie.3"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.2"
port = "3"
chassis = "3"
[device "pcie.4"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.3"
port = "4"
chassis = "4"
[device "pcie.5"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.4"
port = "5"
chassis = "5"
[device "pcie.6"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.5"
port = "6"
chassis = "6"
[device "pcie.7"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.6"
port = "7"
chassis = "7"
[device "pcie.8"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.7"
port = "8"
chassis = "8"
# SCSI storage controller (and storage)
# =========================================================
#
# We use virtio-scsi here so that we can (hot)plug a large
# number of disks without running into issues; a SCSI disk,
# backed by a qcow2 disk image on the host's filesystem, is
# attached to it.
#
# We also create an optical disk, mostly for installation
# purposes: once the guest OS has been succesfully
# installed, the guest will no longer boot from optical
# media. If you don't want, or no longer want, to have an
# optical disk in the guest you can safely comment out
# all relevant sections below.
[device "scsi"]
driver = "virtio-scsi-pci"
bus = "pcie.1"
addr = "00.0"
[device "scsi-disk"]
driver = "scsi-hd"
bus = "scsi.0"
drive = "disk"
bootindex = "1"
[drive "disk"]
file = "guest.qcow2" # CHANGE ME
format = "qcow2"
if = "none"
[device "scsi-optical-disk"]
driver = "scsi-cd"
bus = "scsi.0"
drive = "optical-disk"
bootindex = "2"
[drive "optical-disk"]
file = "install.iso" # CHANGE ME
format = "raw"
if = "none"
# Ethernet controller
# =========================================================
#
# We use virtio-net for improved performance over emulated
# hardware; on the host side, we take advantage of user
# networking so that the QEMU process doesn't require any
# additional privileges.
[netdev "hostnet"]
type = "user"
[device "net"]
driver = "virtio-net-pci"
netdev = "hostnet"
bus = "pcie.2"
addr = "00.0"
# USB controller (and input devices)
# =========================================================
#
# We add a virtualization-friendly USB 3.0 controller and
# a USB tablet so that graphical guests can be controlled
# appropriately. A USB keyboard is not needed, as q35
# guests get a PS/2 one added automatically.
[device "usb"]
driver = "nec-usb-xhci"
bus = "pcie.3"
addr = "00.0"
[device "tablet"]
driver = "usb-tablet"
bus = "usb.0"
# VGA compatible controller
# =========================================================
#
# We plug the QXL video card directly into the PCI Express
# Root Bus as it is a legacy PCI device; this way, we can
# reduce the number of PCI Express controllers in the
# guest.
#
# If you're running the guest on a remote, potentially
# headless host, you will probably want to append something
# like
#
# -display vnc=127.0.0.1:0
#
# to the command line in order to prevent QEMU from
# creating a graphical display window on the host and
# enable remote access instead.
[device "video"]
driver = "qxl-vga"
bus = "pcie.0"
addr = "01.0"
# Audio device
# =========================================================
#
# Like the video card, the sound card is a legacy PCI
# device and as such can be plugged directly into the PCI
# Express Root Bus.
[device "sound"]
driver = "ich9-intel-hda"
bus = "pcie.0"
addr = "1b.0"
[device "duplex"]
driver = "hda-duplex"
bus = "sound.0"
cad = "0"

193
docs/q35-virtio-serial.cfg Normal file
View File

@@ -0,0 +1,193 @@
# q35 - VirtIO guest (serial console)
# =========================================================
#
# Usage:
#
# $ qemu-system-x86_64 \
# -nodefaults \
# -readconfig q35-virtio-serial.cfg \
# -display none -serial mon:stdio
#
# You will probably need to tweak the lines marked as
# CHANGE ME before being able to use this configuration!
#
# The guest will have a selection of VirtIO devices
# tailored towards optimal performance with modern guests,
# and will be accessed through the serial console.
#
# ---------------------------------------------------------
#
# Using -nodefaults is required to have full control over
# the virtual hardware: when it's specified, QEMU will
# populate the board with only the builtin peripherals
# plus a small selection of core PCI devices and
# controllers; the user will then have to explicitly add
# further devices.
#
# The core PCI devices show up in the guest as:
#
# 00:00.0 Host bridge
# 00:1f.0 ISA bridge / LPC
# 00:1f.2 SATA (AHCI) controller
# 00:1f.3 SMBus controller
#
# This configuration file adds a number of other useful
# devices, more specifically:
#
# 00.1c.* PCI bridge (PCI Express Root Ports)
# 01:00.0 SCSI storage controller
# 02:00.0 Ethernet controller
#
# More information about these devices is available below.
#
# We use '-display none' to prevent QEMU from creating a
# graphical display window, which would serve no use in
# this specific configuration, and '-serial mon:stdio' to
# multiplex the guest's serial console and the QEMU monitor
# to the host's stdio; use 'Ctrl+A h' to learn how to
# switch between the two and more.
# Machine options
# =========================================================
#
# We use the q35 machine type and enable KVM acceleration
# for better performance.
#
# Using less than 1 GiB of memory is probably not going to
# yield good performance in the guest, and might even lead
# to obscure boot issues in some cases.
[machine]
type = "q35"
accel = "kvm"
[memory]
size = "1024"
# PCI bridge (PCI Express Root Ports)
# =========================================================
#
# We create eight PCI Express Root Ports, and we plug them
# all into separate functions of the same slot. Some of
# them will be used by devices, the rest will remain
# available for hotplug.
[device "pcie.1"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.0"
port = "1"
chassis = "1"
multifunction = "on"
[device "pcie.2"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.1"
port = "2"
chassis = "2"
[device "pcie.3"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.2"
port = "3"
chassis = "3"
[device "pcie.4"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.3"
port = "4"
chassis = "4"
[device "pcie.5"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.4"
port = "5"
chassis = "5"
[device "pcie.6"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.5"
port = "6"
chassis = "6"
[device "pcie.7"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.6"
port = "7"
chassis = "7"
[device "pcie.8"]
driver = "pcie-root-port"
bus = "pcie.0"
addr = "1c.7"
port = "8"
chassis = "8"
# SCSI storage controller (and storage)
# =========================================================
#
# We use virtio-scsi here so that we can (hot)plug a large
# number of disks without running into issues; a SCSI disk,
# backed by a qcow2 disk image on the host's filesystem, is
# attached to it.
#
# We also create an optical disk, mostly for installation
# purposes: once the guest OS has been succesfully
# installed, the guest will no longer boot from optical
# media. If you don't want, or no longer want, to have an
# optical disk in the guest you can safely comment out
# all relevant sections below.
[device "scsi"]
driver = "virtio-scsi-pci"
bus = "pcie.1"
addr = "00.0"
[device "scsi-disk"]
driver = "scsi-hd"
bus = "scsi.0"
drive = "disk"
bootindex = "1"
[drive "disk"]
file = "guest.qcow2" # CHANGE ME
format = "qcow2"
if = "none"
[device "scsi-optical-disk"]
driver = "scsi-cd"
bus = "scsi.0"
drive = "optical-disk"
bootindex = "2"
[drive "optical-disk"]
file = "install.iso" # CHANGE ME
format = "raw"
if = "none"
# Ethernet controller
# =========================================================
#
# We use virtio-net for improved performance over emulated
# hardware; on the host side, we take advantage of user
# networking so that the QEMU process doesn't require any
# additional privileges.
[netdev "hostnet"]
type = "user"
[device "net"]
driver = "virtio-net-pci"
netdev = "hostnet"
bus = "pcie.2"
addr = "00.0"

View File

@@ -117,10 +117,13 @@ Example:
==== Expression documentation ====
Each expression that isn't an include directive must be preceded by a
Each expression that isn't an include directive may be preceded by a
documentation block. Such blocks are called expression documentation
blocks.
When documentation is required (see pragma 'doc-required'), expression
documentation blocks are mandatory.
The documentation block consists of a first line naming the
expression, an optional overview, a description of each argument (for
commands and events) or member (for structs, unions and alternates),
@@ -128,10 +131,8 @@ and optional tagged sections.
FIXME: the parser accepts these things in almost any order.
Optional arguments / members are tagged with the phrase '#optional',
often with their default value; and extensions added after the
expression was first released are also given a '(since x.y.z)'
comment.
Extensions added after the expression was first released carry a
'(since x.y.z)' comment.
A tagged section starts with one of the following words:
"Note:"/"Notes:", "Since:", "Example"/"Examples", "Returns:", "TODO:".
@@ -147,10 +148,10 @@ For example:
#
# Statistics of a virtual block device or a block backing device.
#
# @device: #optional If the stats are for a virtual block device, the name
# @device: If the stats are for a virtual block device, the name
# corresponding to the virtual block device.
#
# @node-name: #optional The node name of the device. (since 2.3)
# @node-name: The node name of the device. (since 2.3)
#
# ... more members ...
#
@@ -165,7 +166,7 @@ For example:
#
# Query the @BlockStats for all virtual block devices.
#
# @query-nodes: #optional If true, the command will query all the
# @query-nodes: If true, the command will query all the
# block nodes ... explain, explain ... (since 2.3)
#
# Returns: A list of @BlockStats for each virtual block devices.
@@ -204,45 +205,53 @@ once. It is permissible for the schema to contain additional types
not used by any commands or events in the Client JSON Protocol, for
the side effect of generated C code used internally.
There are seven top-level expressions recognized by the parser:
'include', 'command', 'struct', 'enum', 'union', 'alternate', and
'event'. There are several groups of types: simple types (a number of
built-in types, such as 'int' and 'str'; as well as enumerations),
complex types (structs and two flavors of unions), and alternate types
(a choice between other types). The 'command' and 'event' expressions
can refer to existing types by name, or list an anonymous type as a
dictionary. Listing a type name inside an array refers to a
single-dimension array of that type; multi-dimension arrays are not
directly supported (although an array of a complex struct that
contains an array member is possible).
There are eight top-level expressions recognized by the parser:
'include', 'pragma', 'command', 'struct', 'enum', 'union',
'alternate', and 'event'. There are several groups of types: simple
types (a number of built-in types, such as 'int' and 'str'; as well as
enumerations), complex types (structs and two flavors of unions), and
alternate types (a choice between other types). The 'command' and
'event' expressions can refer to existing types by name, or list an
anonymous type as a dictionary. Listing a type name inside an array
refers to a single-dimension array of that type; multi-dimension
arrays are not directly supported (although an array of a complex
struct that contains an array member is possible).
All names must begin with a letter, and contain only ASCII letters,
digits, hyphen, and underscore. There are two exceptions: enum values
may start with a digit, and names that are downstream extensions (see
section Downstream extensions) start with underscore.
Names beginning with 'q_' are reserved for the generator, which uses
them for munging QMP names that resemble C keywords or other
problematic strings. For example, a member named "default" in qapi
becomes "q_default" in the generated C code.
Types, commands, and events share a common namespace. Therefore,
generally speaking, type definitions should always use CamelCase for
user-defined type names, while built-in types are lowercase. Type
definitions should not end in 'Kind', as this namespace is used for
creating implicit C enums for visiting union types, or in 'List', as
this namespace is used for creating array types. Command names,
and member names within a type, should be all lower case with words
separated by a hyphen. However, some existing older commands and
complex types use underscore; when extending such expressions,
consistency is preferred over blindly avoiding underscore. Event
names should be ALL_CAPS with words separated by underscore. Member
names cannot start with 'has-' or 'has_', as this is reserved for
tracking optional members.
user-defined type names, while built-in types are lowercase.
Type names ending with 'Kind' or 'List' are reserved for the
generator, which uses them for implicit union enums and array types,
respectively.
Command names, and member names within a type, should be all lower
case with words separated by a hyphen. However, some existing older
commands and complex types use underscore; when extending such
expressions, consistency is preferred over blindly avoiding
underscore.
Event names should be ALL_CAPS with words separated by underscore.
Member names starting with 'has-' or 'has_' are reserved for the
generator, which uses them for tracking optional members.
Any name (command, event, type, member, or enum value) beginning with
"x-" is marked experimental, and may be withdrawn or changed
incompatibly in a future release. All names must begin with a letter,
and contain only ASCII letters, digits, dash, and underscore. There
are two exceptions: enum values may start with a digit, and any
extensions added by downstream vendors should start with a prefix
matching "__RFQDN_" (for the reverse-fully-qualified-domain-name of
the vendor), even if the rest of the name uses dash (example:
__com.redhat_drive-mirror). Names beginning with 'q_' are reserved
for the generator: QMP names that resemble C keywords or other
problematic strings will be munged in C to use this prefix. For
example, a member named "default" in qapi becomes "q_default" in the
generated C code.
incompatibly in a future release.
Pragma 'name-case-whitelist' lets you violate the rules on use of
upper and lower case. Use for new code is strongly discouraged.
In the rest of this document, usage lines are given for each
expression type, with literal strings written in lower case and
@@ -277,7 +286,7 @@ The following types are predefined, and map to C as follows:
QType QType JSON string matching enum QType values
=== Includes ===
=== Include directives ===
Usage: { 'include': STRING }
@@ -297,6 +306,26 @@ an outer file. The parser may be made stricter in the future to
prevent incomplete include files.
=== Pragma directives ===
Usage: { 'pragma': DICT }
The pragma directive lets you control optional generator behavior.
The dictionary's entries are pragma names and values.
Pragma's scope is currently the complete schema. Setting the same
pragma to different values in parts of the schema doesn't work.
Pragma 'doc-required' takes a boolean value. If true, documentation
is required. Default is false.
Pragma 'returns-whitelist' takes a list of command names that may
violate the rules on permitted return types. Default is none.
Pragma 'name-case-whitelist' takes a list of names that may violate
rules on use of upper- vs. lower-case letters. Default is none.
=== Struct types ===
Usage: { 'struct': STRING, 'data': DICT, '*base': STRUCT-NAME }
@@ -536,22 +565,18 @@ The 'data' argument maps to the "arguments" dictionary passed in as
part of a Client JSON Protocol command. The 'data' member is optional
and defaults to {} (an empty dictionary). If present, it must be the
string name of a complex type, or a dictionary that declares an
anonymous type with the same semantics as a 'struct' expression, with
one exception noted below when 'gen' is used.
anonymous type with the same semantics as a 'struct' expression.
The 'returns' member describes what will appear in the "return" member
of a Client JSON Protocol reply on successful completion of a command.
The member is optional from the command declaration; if absent, the
"return" member will be an empty dictionary. If 'returns' is present,
it must be the string name of a complex or built-in type, a
one-element array containing the name of a complex or built-in type,
with one exception noted below when 'gen' is used. Although it is
permitted to have the 'returns' member name a built-in type or an
array of built-in types, any command that does this cannot be extended
to return additional information in the future; thus, new commands
should strongly consider returning a dictionary-based type or an array
of dictionaries, even if the dictionary only contains one member at the
present.
one-element array containing the name of a complex or built-in type.
To return anything else, you have to list the command in pragma
'returns-whitelist'. If you do this, the command cannot be extended
to return additional information in the future. Use of
'returns-whitelist' for new commands is strongly discouraged.
All commands in Client JSON Protocol use a dictionary to report
failure, with no way to specify that in QAPI. Where the error return
@@ -643,6 +668,18 @@ any non-empty complex type (struct, union, or alternate), and a
pointer to that QAPI type is passed as a single argument.
=== Downstream extensions ===
QAPI schema names that are externally visible, say in the Client JSON
Protocol, need to be managed with care. Names starting with a
downstream prefix of the form __RFQDN_ are reserved for the downstream
who controls the valid, reverse fully qualified domain name RFQDN.
RFQDN may only contain ASCII letters, digits, hyphen and period.
Example: Red Hat, Inc. controls redhat.com, and may therefore add a
downstream command __com.redhat_drive-mirror.
== Client JSON Protocol introspection ==
Clients of a Client JSON Protocol commonly need to figure out what
@@ -1138,7 +1175,7 @@ Example:
Visitor *v;
UserDefOneList *arg1 = NULL;
v = qobject_input_visitor_new(QOBJECT(args), true);
v = qobject_input_visitor_new(QOBJECT(args));
visit_start_struct(v, NULL, NULL, 0, &err);
if (err) {
goto out;

View File

@@ -65,7 +65,7 @@ along with this manual. If not, see http://www.gnu.org/licenses/.
@c for texi2pod:
@c man begin DESCRIPTION
@include qemu-qapi.texi
@include qemu-qmp-qapi.texi
@c man end

View File

@@ -225,3 +225,10 @@ recording the virtual machine this filter puts all packets coming from
the outer world into the log. In replay mode packets from the log are
injected into the network device. All interactions with network backend
in replay mode are disabled.
Audio devices
-------------
Audio data is recorded and replay automatically. The command line for recording
and replaying must contain identical specifications of audio hardware, e.g.:
-soundhw ac97

245
docs/specs/vmgenid.txt Normal file
View File

@@ -0,0 +1,245 @@
VIRTUAL MACHINE GENERATION ID
=============================
Copyright (C) 2016 Red Hat, Inc.
Copyright (C) 2017 Skyport Systems, Inc.
This work is licensed under the terms of the GNU GPL, version 2 or later.
See the COPYING file in the top-level directory.
===
The VM generation ID (vmgenid) device is an emulated device which
exposes a 128-bit, cryptographically random, integer value identifier,
referred to as a Globally Unique Identifier, or GUID.
This allows management applications (e.g. libvirt) to notify the guest
operating system when the virtual machine is executed with a different
configuration (e.g. snapshot execution or creation from a template). The
guest operating system notices the change, and is then able to react as
appropriate by marking its copies of distributed databases as dirty,
re-initializing its random number generator etc.
Requirements
------------
These requirements are extracted from the "How to implement virtual machine
generation ID support in a virtualization platform" section of the
specification, dated August 1, 2012.
The document may be found on the web at:
http://go.microsoft.com/fwlink/?LinkId=260709
R1a. The generation ID shall live in an 8-byte aligned buffer.
R1b. The buffer holding the generation ID shall be in guest RAM, ROM, or device
MMIO range.
R1c. The buffer holding the generation ID shall be kept separate from areas
used by the operating system.
R1d. The buffer shall not be covered by an AddressRangeMemory or
AddressRangeACPI entry in the E820 or UEFI memory map.
R1e. The generation ID shall not live in a page frame that could be mapped with
caching disabled. (In other words, regardless of whether the generation ID
lives in RAM, ROM or MMIO, it shall only be mapped as cacheable.)
R2 to R5. [These AML requirements are isolated well enough in the Microsoft
specification for us to simply refer to them here.]
R6. The hypervisor shall expose a _HID (hardware identifier) object in the
VMGenId device's scope that is unique to the hypervisor vendor.
QEMU Implementation
-------------------
The above-mentioned specification does not dictate which ACPI descriptor table
will contain the VM Generation ID device. Other implementations (Hyper-V and
Xen) put it in the main descriptor table (Differentiated System Description
Table or DSDT). For ease of debugging and implementation, we have decided to
put it in its own Secondary System Description Table, or SSDT.
The following is a dump of the contents from a running system:
# iasl -p ./SSDT -d /sys/firmware/acpi/tables/SSDT
Intel ACPI Component Architecture
ASL+ Optimizing Compiler version 20150717-64
Copyright (c) 2000 - 2015 Intel Corporation
Reading ACPI table from file /sys/firmware/acpi/tables/SSDT - Length
00000198 (0x0000C6)
ACPI: SSDT 0x0000000000000000 0000C6 (v01 BOCHS VMGENID 00000001 BXPC
00000001)
Acpi table [SSDT] successfully installed and loaded
Pass 1 parse of [SSDT]
Pass 2 parse of [SSDT]
Parsing Deferred Opcodes (Methods/Buffers/Packages/Regions)
Parsing completed
Disassembly completed
ASL Output: ./SSDT.dsl - 1631 bytes
# cat SSDT.dsl
/*
* Intel ACPI Component Architecture
* AML/ASL+ Disassembler version 20150717-64
* Copyright (c) 2000 - 2015 Intel Corporation
*
* Disassembling to symbolic ASL+ operators
*
* Disassembly of /sys/firmware/acpi/tables/SSDT, Sun Feb 5 00:19:37 2017
*
* Original Table Header:
* Signature "SSDT"
* Length 0x000000CA (202)
* Revision 0x01
* Checksum 0x4B
* OEM ID "BOCHS "
* OEM Table ID "VMGENID"
* OEM Revision 0x00000001 (1)
* Compiler ID "BXPC"
* Compiler Version 0x00000001 (1)
*/
DefinitionBlock ("/sys/firmware/acpi/tables/SSDT.aml", "SSDT", 1, "BOCHS ",
"VMGENID", 0x00000001)
{
Name (VGIA, 0x07FFF000)
Scope (\_SB)
{
Device (VGEN)
{
Name (_HID, "QEMUVGID") // _HID: Hardware ID
Name (_CID, "VM_Gen_Counter") // _CID: Compatible ID
Name (_DDN, "VM_Gen_Counter") // _DDN: DOS Device Name
Method (_STA, 0, NotSerialized) // _STA: Status
{
Local0 = 0x0F
If ((VGIA == Zero))
{
Local0 = Zero
}
Return (Local0)
}
Method (ADDR, 0, NotSerialized)
{
Local0 = Package (0x02) {}
Index (Local0, Zero) = (VGIA + 0x28)
Index (Local0, One) = Zero
Return (Local0)
}
}
}
Method (\_GPE._E05, 0, NotSerialized) // _Exx: Edge-Triggered GPE
{
Notify (\_SB.VGEN, 0x80) // Status Change
}
}
Design Details:
---------------
Requirements R1a through R1e dictate that the memory holding the
VM Generation ID must be allocated and owned by the guest firmware,
in this case BIOS or UEFI. However, to be useful, QEMU must be able to
change the contents of the memory at runtime, specifically when starting a
backed-up or snapshotted image. In order to do this, QEMU must know the
address that has been allocated.
The mechanism chosen for this memory sharing is writeable fw_cfg blobs.
These are data object that are visible to both QEMU and guests, and are
addressable as sequential files.
More information about fw_cfg can be found in "docs/specs/fw_cfg.txt"
Two fw_cfg blobs are used in this case:
/etc/vmgenid_guid - contains the actual VM Generation ID GUID
- read-only to the guest
/etc/vmgenid_addr - contains the address of the downloaded vmgenid blob
- writeable by the guest
QEMU sends the following commands to the guest at startup:
1. Allocate memory for vmgenid_guid fw_cfg blob.
2. Write the address of vmgenid_guid into the SSDT (VGIA ACPI variable as
shown above in the iasl dump). Note that this change is not propagated
back to QEMU.
3. Write the address of vmgenid_guid back to QEMU's copy of vmgenid_addr
via the fw_cfg DMA interface.
After step 3, QEMU is able to update the contents of vmgenid_guid at will.
Since BIOS or UEFI does not necessarily run when we wish to change the GUID,
the value of VGIA is persisted via the VMState mechanism.
As spelled out in the specification, any change to the GUID executes an
ACPI notification. The exact handler to use is not specified, so the vmgenid
device uses the first unused one: \_GPE._E05.
Endian-ness Considerations:
---------------------------
Although not specified in Microsoft's document, it is assumed that the
device is expected to use little-endian format.
All GUID passed in via command line or monitor are treated as big-endian.
GUID values displayed via monitor are shown in big-endian format.
GUID Storage Format:
--------------------
In order to implement an OVMF "SDT Header Probe Suppressor", the contents of
the vmgenid_guid fw_cfg blob are not simply a 128-bit GUID. There is also
significant padding in order to align and fill a memory page, as shown in the
following diagram:
+----------------------------------+
| SSDT with OEM Table ID = VMGENID |
+----------------------------------+
| ... | TOP OF PAGE
| VGIA dword object ---------------|-----> +---------------------------+
| ... | | fw-allocated array for |
| _STA method referring to VGIA | | "etc/vmgenid_guid" |
| ... | +---------------------------+
| ADDR method referring to VGIA | | 0: OVMF SDT Header probe |
| ... | | suppressor |
+----------------------------------+ | 36: padding for 8-byte |
| alignment |
| 40: GUID |
| 56: padding to page size |
+---------------------------+
END OF PAGE
Device Usage:
-------------
The device has one property, which may be only be set using the command line:
guid - sets the value of the GUID. A special value "auto" instructs
QEMU to generate a new random GUID.
For example:
QEMU -device vmgenid,guid="324e6eaf-d1d1-4bf6-bf41-b9bb6c91fb87"
QEMU -device vmgenid,guid=auto
The property may be queried via QMP/HMP:
(QEMU) query-vm-generation-id
{"return": {"guid": "324e6eaf-d1d1-4bf6-bf41-b9bb6c91fb87"}}
Setting of this parameter is intentionally left out from the QMP/HMP
interfaces. There are no known use cases for changing the GUID once QEMU is
running, and adding this capability would greatly increase the complexity.

View File

@@ -252,7 +252,7 @@ here goes "hello-world"'s new entry for the qapi-schema.json file:
#
# Print a client provided string to the standard output stream.
#
# @message: #optional string to be printed
# @message: string to be printed
#
# Returns: Nothing on success.
#
@@ -358,7 +358,7 @@ The best way to return that data is to create a new QAPI type, as shown below:
#
# @clock-name: The alarm clock method's name.
#
# @next-deadline: #optional The time (in nanoseconds) the next alarm will fire.
# @next-deadline: The time (in nanoseconds) the next alarm will fire.
#
# Since: 1.0
##

2
dtc

Submodule dtc updated: ec02b34c05...558cd81bdd

174
exec.c
View File

@@ -42,9 +42,17 @@
#include "exec/memory.h"
#include "exec/ioport.h"
#include "sysemu/dma.h"
#include "sysemu/numa.h"
#include "sysemu/hw_accel.h"
#include "exec/address-spaces.h"
#include "sysemu/xen-mapcache.h"
#include "trace-root.h"
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
#include <fcntl.h>
#include <linux/falloc.h>
#endif
#endif
#include "exec/cpu-all.h"
#include "qemu/rcu_queue.h"
@@ -1250,6 +1258,87 @@ void qemu_mutex_unlock_ramlist(void)
qemu_mutex_unlock(&ram_list.mutex);
}
#ifdef __linux__
/*
* FIXME TOCTTOU: this iterates over memory backends' mem-path, which
* may or may not name the same files / on the same filesystem now as
* when we actually open and map them. Iterate over the file
* descriptors instead, and use qemu_fd_getpagesize().
*/
static int find_max_supported_pagesize(Object *obj, void *opaque)
{
char *mem_path;
long *hpsize_min = opaque;
if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
mem_path = object_property_get_str(obj, "mem-path", NULL);
if (mem_path) {
long hpsize = qemu_mempath_getpagesize(mem_path);
if (hpsize < *hpsize_min) {
*hpsize_min = hpsize;
}
} else {
*hpsize_min = getpagesize();
}
}
return 0;
}
long qemu_getrampagesize(void)
{
long hpsize = LONG_MAX;
long mainrampagesize;
Object *memdev_root;
if (mem_path) {
mainrampagesize = qemu_mempath_getpagesize(mem_path);
} else {
mainrampagesize = getpagesize();
}
/* it's possible we have memory-backend objects with
* hugepage-backed RAM. these may get mapped into system
* address space via -numa parameters or memory hotplug
* hooks. we want to take these into account, but we
* also want to make sure these supported hugepage
* sizes are applicable across the entire range of memory
* we may boot from, so we take the min across all
* backends, and assume normal pages in cases where a
* backend isn't backed by hugepages.
*/
memdev_root = object_resolve_path("/objects", NULL);
if (memdev_root) {
object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
}
if (hpsize == LONG_MAX) {
/* No additional memory regions found ==> Report main RAM page size */
return mainrampagesize;
}
/* If NUMA is disabled or the NUMA nodes are not backed with a
* memory-backend, then there is at least one node using "normal" RAM,
* so if its page size is smaller we have got to report that size instead.
*/
if (hpsize > mainrampagesize &&
(nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
static bool warned;
if (!warned) {
error_report("Huge page support disabled (n/a for main memory).");
warned = true;
}
return mainrampagesize;
}
return hpsize;
}
#else
long qemu_getrampagesize(void)
{
return getpagesize();
}
#endif
#ifdef __linux__
static int64_t get_file_size(int fd)
{
@@ -1379,7 +1468,7 @@ static void *file_ram_alloc(RAMBlock *block,
}
if (mem_prealloc) {
os_mem_prealloc(fd, area, memory, errp);
os_mem_prealloc(fd, area, memory, smp_cpus, errp);
if (errp && *errp) {
goto error;
}
@@ -1472,6 +1561,11 @@ const char *qemu_ram_get_idstr(RAMBlock *rb)
return rb->idstr;
}
bool qemu_ram_is_shared(RAMBlock *rb)
{
return rb->flags & RAM_SHARED;
}
/* Called with iothread lock held. */
void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
{
@@ -1518,6 +1612,19 @@ size_t qemu_ram_pagesize(RAMBlock *rb)
return rb->page_size;
}
/* Returns the largest size of page in use */
size_t qemu_ram_pagesize_largest(void)
{
RAMBlock *block;
size_t largest = 0;
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
largest = MAX(largest, qemu_ram_pagesize(block));
}
return largest;
}
static int memory_try_enable_merging(void *addr, size_t len)
{
if (!machine_mem_merge(current_machine)) {
@@ -3208,6 +3315,7 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
hwaddr phys_addr;
target_ulong page;
cpu_synchronize_state(cpu);
while (len > 0) {
int asidx;
MemTxAttrs attrs;
@@ -3294,4 +3402,68 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
rcu_read_unlock();
return ret;
}
/*
* Unmap pages of memory from start to start+length such that
* they a) read as 0, b) Trigger whatever fault mechanism
* the OS provides for postcopy.
* The pages must be unmapped by the end of the function.
* Returns: 0 on success, none-0 on failure
*
*/
int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
{
int ret = -1;
uint8_t *host_startaddr = rb->host + start;
if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
error_report("ram_block_discard_range: Unaligned start address: %p",
host_startaddr);
goto err;
}
if ((start + length) <= rb->used_length) {
uint8_t *host_endaddr = host_startaddr + length;
if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
error_report("ram_block_discard_range: Unaligned end address: %p",
host_endaddr);
goto err;
}
errno = ENOTSUP; /* If we are missing MADVISE etc */
if (rb->page_size == qemu_host_page_size) {
#if defined(CONFIG_MADVISE)
/* Note: We need the madvise MADV_DONTNEED behaviour of definitely
* freeing the page.
*/
ret = madvise(host_startaddr, length, MADV_DONTNEED);
#endif
} else {
/* Huge page case - unfortunately it can't do DONTNEED, but
* it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
* huge page file.
*/
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
start, length);
#endif
}
if (ret) {
ret = -errno;
error_report("ram_block_discard_range: Failed to discard range "
"%s:%" PRIx64 " +%zx (%d)",
rb->idstr, start, length, ret);
}
} else {
error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
"/%zx/" RAM_ADDR_FMT")",
rb->idstr, start, length, rb->used_length);
}
err:
return ret;
}
#endif

View File

@@ -7492,7 +7492,7 @@ uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status)
{
signed char current_rounding_mode = status->float_rounding_mode;
set_float_rounding_mode(float_round_to_zero, status);
int64_t v = float64_to_uint64(a, status);
uint64_t v = float64_to_uint64(a, status);
set_float_rounding_mode(current_rounding_mode, status);
return v;
}

View File

@@ -5,7 +5,7 @@ common-obj-y = qemu-fsdev.o 9p-marshal.o 9p-iov-marshal.o
else
common-obj-y = qemu-fsdev-dummy.o
endif
common-obj-y += qemu-fsdev-opts.o
common-obj-y += qemu-fsdev-opts.o qemu-fsdev-throttle.o
# Toplevel always builds this; targets without virtio will put it in
# common-obj-y

View File

@@ -17,6 +17,7 @@
#include <dirent.h>
#include <utime.h>
#include <sys/vfs.h>
#include "qemu-fsdev-throttle.h"
#define SM_LOCAL_MODE_BITS 0600
#define SM_LOCAL_DIR_MODE_BITS 0700
@@ -74,6 +75,7 @@ typedef struct FsDriverEntry {
char *path;
int export_flags;
FileOperations *ops;
FsThrottle fst;
} FsDriverEntry;
typedef struct FsContext
@@ -83,6 +85,7 @@ typedef struct FsContext
int export_flags;
struct xattr_operations **xops;
struct extended_ops exops;
FsThrottle *fst;
/* fs driver specific data */
void *private;
} FsContext;

View File

@@ -9,6 +9,7 @@
#include "qemu/config-file.h"
#include "qemu/option.h"
#include "qemu/module.h"
#include "qemu/throttle-options.h"
static QemuOptsList qemu_fsdev_opts = {
.name = "fsdev",
@@ -39,6 +40,8 @@ static QemuOptsList qemu_fsdev_opts = {
.type = QEMU_OPT_NUMBER,
},
THROTTLE_OPTS,
{ /*End of list */ }
},
};

118
fsdev/qemu-fsdev-throttle.c Normal file
View File

@@ -0,0 +1,118 @@
/*
* Fsdev Throttle
*
* Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
*
* Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version.
*
* See the COPYING file in the top-level directory for details.
*
*/
#include "qemu/osdep.h"
#include "qemu/error-report.h"
#include "qemu-fsdev-throttle.h"
#include "qemu/iov.h"
static void fsdev_throttle_read_timer_cb(void *opaque)
{
FsThrottle *fst = opaque;
qemu_co_enter_next(&fst->throttled_reqs[false]);
}
static void fsdev_throttle_write_timer_cb(void *opaque)
{
FsThrottle *fst = opaque;
qemu_co_enter_next(&fst->throttled_reqs[true]);
}
void fsdev_throttle_parse_opts(QemuOpts *opts, FsThrottle *fst, Error **errp)
{
throttle_config_init(&fst->cfg);
fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg =
qemu_opt_get_number(opts, "throttling.bps-total", 0);
fst->cfg.buckets[THROTTLE_BPS_READ].avg =
qemu_opt_get_number(opts, "throttling.bps-read", 0);
fst->cfg.buckets[THROTTLE_BPS_WRITE].avg =
qemu_opt_get_number(opts, "throttling.bps-write", 0);
fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg =
qemu_opt_get_number(opts, "throttling.iops-total", 0);
fst->cfg.buckets[THROTTLE_OPS_READ].avg =
qemu_opt_get_number(opts, "throttling.iops-read", 0);
fst->cfg.buckets[THROTTLE_OPS_WRITE].avg =
qemu_opt_get_number(opts, "throttling.iops-write", 0);
fst->cfg.buckets[THROTTLE_BPS_TOTAL].max =
qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
fst->cfg.buckets[THROTTLE_BPS_READ].max =
qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
fst->cfg.buckets[THROTTLE_BPS_WRITE].max =
qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
fst->cfg.buckets[THROTTLE_OPS_TOTAL].max =
qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
fst->cfg.buckets[THROTTLE_OPS_READ].max =
qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
fst->cfg.buckets[THROTTLE_OPS_WRITE].max =
qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length =
qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
fst->cfg.buckets[THROTTLE_BPS_READ].burst_length =
qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length =
qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length =
qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
fst->cfg.buckets[THROTTLE_OPS_READ].burst_length =
qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length =
qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
fst->cfg.op_size =
qemu_opt_get_number(opts, "throttling.iops-size", 0);
throttle_is_valid(&fst->cfg, errp);
}
void fsdev_throttle_init(FsThrottle *fst)
{
if (throttle_enabled(&fst->cfg)) {
throttle_init(&fst->ts);
throttle_timers_init(&fst->tt,
qemu_get_aio_context(),
QEMU_CLOCK_REALTIME,
fsdev_throttle_read_timer_cb,
fsdev_throttle_write_timer_cb,
fst);
throttle_config(&fst->ts, &fst->tt, &fst->cfg);
qemu_co_queue_init(&fst->throttled_reqs[0]);
qemu_co_queue_init(&fst->throttled_reqs[1]);
}
}
void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write,
struct iovec *iov, int iovcnt)
{
if (throttle_enabled(&fst->cfg)) {
if (throttle_schedule_timer(&fst->ts, &fst->tt, is_write) ||
!qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL);
}
throttle_account(&fst->ts, is_write, iov_size(iov, iovcnt));
if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) &&
!throttle_schedule_timer(&fst->ts, &fst->tt, is_write)) {
qemu_co_queue_next(&fst->throttled_reqs[is_write]);
}
}
}
void fsdev_throttle_cleanup(FsThrottle *fst)
{
if (throttle_enabled(&fst->cfg)) {
throttle_timers_destroy(&fst->tt);
}
}

View File

@@ -0,0 +1,39 @@
/*
* Fsdev Throttle
*
* Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
*
* Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version.
*
* See the COPYING file in the top-level directory for details.
*
*/
#ifndef _FSDEV_THROTTLE_H
#define _FSDEV_THROTTLE_H
#include "block/aio.h"
#include "qemu/main-loop.h"
#include "qemu/coroutine.h"
#include "qapi/error.h"
#include "qemu/throttle.h"
typedef struct FsThrottle {
ThrottleState ts;
ThrottleTimers tt;
ThrottleConfig cfg;
CoQueue throttled_reqs[2];
} FsThrottle;
void fsdev_throttle_parse_opts(QemuOpts *, FsThrottle *, Error **);
void fsdev_throttle_init(FsThrottle *);
void coroutine_fn fsdev_co_throttle_request(FsThrottle *, bool ,
struct iovec *, int);
void fsdev_throttle_cleanup(FsThrottle *);
#endif /* _FSDEV_THROTTLE_H */

View File

@@ -801,6 +801,20 @@ STEXI
Show information about hotpluggable CPUs
ETEXI
STEXI
@item info vm-generation-id
@findex vm-generation-id
Show Virtual Machine Generation ID
ETEXI
{
.name = "vm-generation-id",
.args_type = "",
.params = "",
.help = "Show Virtual Machine Generation ID",
.cmd = hmp_info_vm_generation_id,
},
STEXI
@end table
ETEXI

44
hmp.c
View File

@@ -2045,13 +2045,17 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
const char* device = qdict_get_str(qdict, "device");
const char* command = qdict_get_str(qdict, "command");
Error *err = NULL;
int ret;
blk = blk_by_name(device);
if (!blk) {
BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
if (bs) {
blk = local_blk = blk_new();
blk_insert_bs(blk, bs);
blk = local_blk = blk_new(0, BLK_PERM_ALL);
ret = blk_insert_bs(blk, bs, &err);
if (ret < 0) {
goto fail;
}
} else {
goto fail;
}
@@ -2060,6 +2064,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
aio_context = blk_get_aio_context(blk);
aio_context_acquire(aio_context);
/*
* Notably absent: Proper permission management. This is sad, but it seems
* almost impossible to achieve without changing the semantics and thereby
* limiting the use cases of the qemu-io HMP command.
*
* In an ideal world we would unconditionally create a new BlockBackend for
* qemuio_command(), but we have commands like 'reopen' and want them to
* take effect on the exact BlockBackend whose name the user passed instead
* of just on a temporary copy of it.
*
* Another problem is that deleting the temporary BlockBackend involves
* draining all requests on it first, but some qemu-iotests cases want to
* issue multiple aio_read/write requests and expect them to complete in
* the background while the monitor has already returned.
*
* This is also what prevents us from saving the original permissions and
* restoring them later: We can't revoke permissions until all requests
* have completed, and we don't know when that is nor can we really let
* anything else run before we have revoken them to avoid race conditions.
*
* What happens now is that command() in qemu-io-cmds.c can extend the
* permissions if necessary for the qemu-io command. And they simply stay
* extended, possibly resulting in a read-only guest device keeping write
* permissions. Ugly, but it appears to be the lesser evil.
*/
qemuio_command(blk, command);
aio_context_release(aio_context);
@@ -2576,3 +2605,14 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict)
qapi_free_HotpluggableCPUList(saved);
}
void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
GuidInfo *info = qmp_query_vm_generation_id(&err);
if (info) {
monitor_printf(mon, "%s\n", info->guid);
}
hmp_handle_error(mon, &err);
qapi_free_GuidInfo(info);
}

1
hmp.h
View File

@@ -137,5 +137,6 @@ void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict);
void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict);
void hmp_info_dump(Monitor *mon, const QDict *qdict);
void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict);
void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict);
#endif

File diff suppressed because it is too large Load Diff

20
hw/9pfs/9p-local.h Normal file
View File

@@ -0,0 +1,20 @@
/*
* 9p local backend utilities
*
* Copyright IBM, Corp. 2017
*
* Authors:
* Greg Kurz <groug@kaod.org>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef QEMU_9P_LOCAL_H
#define QEMU_9P_LOCAL_H
int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
mode_t mode);
int local_opendir_nofollow(FsContext *fs_ctx, const char *path);
#endif

View File

@@ -25,13 +25,7 @@
static ssize_t mp_pacl_getxattr(FsContext *ctx, const char *path,
const char *name, void *value, size_t size)
{
char *buffer;
ssize_t ret;
buffer = rpath(ctx, path);
ret = lgetxattr(buffer, MAP_ACL_ACCESS, value, size);
g_free(buffer);
return ret;
return local_getxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size);
}
static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
@@ -56,23 +50,16 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
static int mp_pacl_setxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags)
{
char *buffer;
int ret;
buffer = rpath(ctx, path);
ret = lsetxattr(buffer, MAP_ACL_ACCESS, value, size, flags);
g_free(buffer);
return ret;
return local_setxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size,
flags);
}
static int mp_pacl_removexattr(FsContext *ctx,
const char *path, const char *name)
{
int ret;
char *buffer;
buffer = rpath(ctx, path);
ret = lremovexattr(buffer, MAP_ACL_ACCESS);
ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS);
if (ret == -1 && errno == ENODATA) {
/*
* We don't get ENODATA error when trying to remove a
@@ -82,20 +69,13 @@ static int mp_pacl_removexattr(FsContext *ctx,
errno = 0;
ret = 0;
}
g_free(buffer);
return ret;
}
static ssize_t mp_dacl_getxattr(FsContext *ctx, const char *path,
const char *name, void *value, size_t size)
{
char *buffer;
ssize_t ret;
buffer = rpath(ctx, path);
ret = lgetxattr(buffer, MAP_ACL_DEFAULT, value, size);
g_free(buffer);
return ret;
return local_getxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size);
}
static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
@@ -120,23 +100,16 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
static int mp_dacl_setxattr(FsContext *ctx, const char *path, const char *name,
void *value, size_t size, int flags)
{
char *buffer;
int ret;
buffer = rpath(ctx, path);
ret = lsetxattr(buffer, MAP_ACL_DEFAULT, value, size, flags);
g_free(buffer);
return ret;
return local_setxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size,
flags);
}
static int mp_dacl_removexattr(FsContext *ctx,
const char *path, const char *name)
{
int ret;
char *buffer;
buffer = rpath(ctx, path);
ret = lremovexattr(buffer, MAP_ACL_DEFAULT);
ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT);
if (ret == -1 && errno == ENODATA) {
/*
* We don't get ENODATA error when trying to remove a
@@ -146,7 +119,6 @@ static int mp_dacl_removexattr(FsContext *ctx,
errno = 0;
ret = 0;
}
g_free(buffer);
return ret;
}

Some files were not shown because too many files have changed in this diff Show More