Compare commits

...

716 Commits

Author SHA1 Message Date
Gonglei
3e10c3ecfc vnc: fix qemu crash because of SIGSEGV
The backtrace is:

0x00007f0b75cdf880 in pixman_image_get_stride () from /lib64/libpixman-1.so.0
0x00007f0b77bcb3cf in vnc_server_fb_stride (vd=0x7f0b7a1a2bb0) at ui/vnc.c:680
vnc_dpy_copy (dcl=0x7f0b7a1a2c00, src_x=224, src_y=263, dst_x=319, dst_y=363, w=1, h=1) at ui/vnc.c:915
0x00007f0b77bbcc35 in dpy_gfx_copy (con=0x7f0b7a146210, src_x=src_x@entry=224, src_y=src_y@entry=263, dst_x=dst_x@entry=319,
dst_y=dst_y@entry=363, w=1, h=1) at ui/console.c:1575
0x00007f0b77bbda4e in qemu_console_copy (con=<optimized out>, src_x=src_x@entry=224, src_y=src_y@entry=263, dst_x=dst_x@entry=319,
dst_y=dst_y@entry=363, w=<optimized out>, h=<optimized out>) at ui/console.c:2111
0x00007f0b77ac0980 in cirrus_do_copy (h=<optimized out>, w=<optimized out>, src=<optimized out>, dst=<optimized out>, s=0x7f0b7b086090) at hw/display/cirrus_vga.c:774
cirrus_bitblt_videotovideo_copy (s=0x7f0b7b086090) at hw/display/cirrus_vga.c:793
cirrus_bitblt_videotovideo (s=0x7f0b7b086090) at hw/display/cirrus_vga.c:915
cirrus_bitblt_start (s=0x7f0b7b086090) at hw/display/cirrus_vga.c:1056
0x00007f0b77965cfb in memory_region_write_accessor (mr=0x7f0b7b096e40, addr=320, value=<optimized out>, size=1, shift=<optimized out>,mask=<optimized out>, attrs=...) at /root/rpmbuild/BUILD/master/qemu/memory.c:525
0x00007f0b77963f59 in access_with_adjusted_size (addr=addr@entry=320, value=value@entry=0x7f0b69a268d8, size=size@entry=4,
access_size_min=<optimized out>, access_size_max=<optimized out>, access=access@entry=0x7f0b77965c80 <memory_region_write_accessor>,
mr=mr@entry=0x7f0b7b096e40, attrs=attrs@entry=...) at /root/rpmbuild/BUILD/master/qemu/memory.c:591
0x00007f0b77968315 in memory_region_dispatch_write (mr=mr@entry=0x7f0b7b096e40, addr=addr@entry=320, data=18446744073709551362,
size=size@entry=4, attrs=attrs@entry=...) at /root/rpmbuild/BUILD/master/qemu/memory.c:1262
0x00007f0b779256a9 in address_space_write_continue (mr=0x7f0b7b096e40, l=4, addr1=320, len=4, buf=0x7f0b77713028 "\002\377\377\377",
attrs=..., addr=4273930560, as=0x7f0b7827d280 <address_space_memory>) at /root/rpmbuild/BUILD/master/qemu/exec.c:2544
address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized out>) at /root/rpmbuild/BUILD/master/qemu/exec.c:2601
0x00007f0b77925c1d in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=..., attrs@entry=...,
buf=buf@entry=0x7f0b77713028 "\002\377\377\377", len=<optimized out>, is_write=<optimized out>) at /root/rpmbuild/BUILD/master/qemu/exec.c:2703
0x00007f0b77962f53 in kvm_cpu_exec (cpu=cpu@entry=0x7f0b79fcc2d0) at /root/rpmbuild/BUILD/master/qemu/kvm-all.c:1965
0x00007f0b77950cc6 in qemu_kvm_cpu_thread_fn (arg=0x7f0b79fcc2d0) at /root/rpmbuild/BUILD/master/qemu/cpus.c:1078
0x00007f0b744b3dc5 in start_thread (arg=0x7f0b69a27700) at pthread_create.c:308
0x00007f0b70d3d66d in clone () from /lib64/libc.so.6

The code path while meeting segfault:
 vnc_dpy_copy
   vnc_update_client
     vnc_disconnect_finish [while vnc_disconnect_start() is invoked because somethins wrong]
       vnc_update_server_surface
         vd->server = NULL;
   vnc_server_fb_stride
     pixman_image_get_stride(vd->server)

Let's add a non-NULL check before calling vnc_server_fb_stride() to avoid segmentation fault.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Daniel P. Berrange <berrange@redhat.com>
Reported-by: Yanying Zhuang <ann.zhuangyanying@huawei.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1472788698-120964-1-git-send-email-arei.gonglei@huawei.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-09-13 08:01:39 +02:00
Li Zhijian
93ca519ec4 qemu-options.hx: correct spice options streaming-video default document value to 'off'
since f1d3e58, the code had changed the default value to 'off', so this patch
make document and code are consistent.

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Message-id: 1470024419-10886-1-git-send-email-lizhijian@cn.fujitsu.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-09-13 08:01:39 +02:00
Peter Maydell
99a9ef44dc ui/curses.c: Clean up nextchr logic
Coverity identifies that at the top of the while(1) loop
in curses_refresh() the variable nextchr is always ERR,
and so the else case of the first if() is dead code.
Remove this dead code, and narrow the scope of the
nextchr variable to the place where it's used.

(This confused logic has been present since the curses
code was added to QEMU in 2008.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1470925407-23850-3-git-send-email-peter.maydell@linaro.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-09-13 08:01:39 +02:00
Peter Maydell
bba4e1b591 ui/curses.c: Ensure we don't read off the end of curses2qemu array
Coverity spots that there is no bounds check before we
access the curses2qemu[] array.  Add one, bringing this
code path into line with the one that looks up entries
in curses2keysym[].

In theory getch() shouldn't return out of range keycodes,
but it's better not to assume this.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1470925407-23850-2-git-send-email-peter.maydell@linaro.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-09-13 08:01:39 +02:00
Peter Maydell
7263da7804 Merge remote-tracking branch 'remotes/mcayland/tags/qemu-openbios-signed' into staging
Update OpenBIOS images

# gpg: Signature made Mon 12 Sep 2016 11:51:09 BST
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* remotes/mcayland/tags/qemu-openbios-signed:
  Update OpenBIOS images to c5542f2 built from submodule.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-12 15:09:47 +01:00
Peter Maydell
d4c61988b8 Merge remote-tracking branch 'remotes/berrange/tags/pull-qcrypto-2016-09-12-1' into staging
Merge qcrypto 2016/09/12 v1

# gpg: Signature made Mon 12 Sep 2016 12:02:20 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qcrypto-2016-09-12-1:
  crypto: report enum strings instead of values in errors
  crypto: fix building complaint
  crypto: ensure XTS is only used with ciphers with 16 byte blocks

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-12 12:48:47 +01:00
Daniel P. Berrange
90d6f60d07 crypto: report enum strings instead of values in errors
Several error messages print out the raw enum value, which
is less than helpful to users, as these values are not
documented, nor stable across QEMU releases. Switch to use
the enum string instead.

The nettle impl also had two typos where it mistakenly
said "algorithm" instead of "mode", and actually reported
the algorithm value too.

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-09-12 12:00:52 +01:00
Gonglei
d9269b274a crypto: fix building complaint
gnutls commit 846753877d renamed LIBGNUTLS_VERSION_NUMBER to GNUTLS_VERSION_NUMBER.
If using gnutls before that verion, we'll get the below warning:
crypto/tlscredsx509.c:618:5: warning: "GNUTLS_VERSION_NUMBER" is not defined

Because gnutls 3.x still defines LIBGNUTLS_VERSION_NUMBER for back compat, Let's
use LIBGNUTLS_VERSION_NUMBER instead of GNUTLS_VERSION_NUMBER to fix building
complaint.

Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-09-12 12:00:52 +01:00
Daniel P. Berrange
a5d2f44d0d crypto: ensure XTS is only used with ciphers with 16 byte blocks
The XTS cipher mode needs to be used with a cipher which has
a block size of 16 bytes. If a mis-matching block size is used,
the code will either corrupt memory beyond the IV array, or
not fully encrypt/decrypt the IV.

This fixes a memory corruption crash when attempting to use
cast5-128 with xts, since the former has an 8 byte block size.

A test case is added to ensure the cipher creation fails with
such an invalid combination.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-09-12 12:00:06 +01:00
Peter Maydell
c569c537e5 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio,vhost,pc: fixes and updates

balloon fixes wrt migration
virtio-vsock device support

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 09 Sep 2016 22:36:13 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  vhost-vsock: add virtio sockets device
  tests/acpi: speedup acpi tests
  virtio-pci: minor refactoring
  vhost: don't set vring call if no vector
  virtio-pci: error out when both legacy and modern modes are disabled
  virtio-balloon: fix stats vq migration
  virtio: add virtqueue_rewind()
  virtio-balloon: discard virtqueue element on reset
  virtio: zero vq->inuse in virtio_reset()
  virtio-pci: reduce modern_mem_bar size
  target-i386: present virtual L3 cache info for vcpus
  pc: Add 2.8 machine
  virtio-pci: use size from correct structure
  virtio: Tell the user what went wrong when event_notifier_init failed

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-12 11:25:40 +01:00
Mark Cave-Ayland
a26f7f2cb8 Update OpenBIOS images to c5542f2 built from submodule.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-09-12 08:14:50 +01:00
Stefan Hajnoczi
fc0b9b0e1c vhost-vsock: add virtio sockets device
Implement the new virtio sockets device for host<->guest communication
using the Sockets API.  Most of the work is done in a vhost kernel
driver so that virtio-vsock can hook into the AF_VSOCK address family.
The QEMU vhost-vsock device handles configuration and live migration
while the rx/tx happens in the vhost_vsock.ko Linux kernel driver.

The vsock device must be given a CID (host-wide unique address):

  # qemu -device vhost-vsock-pci,id=vhost-vsock-pci0,guest-cid=3 ...

For more information see:
http://qemu-project.org/Features/VirtioVsock

[Endianness fixes and virtio-ccw support by Claudio Imbrenda
<imbrenda@linux.vnet.ibm.com>]

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
[mst: rebase to master]
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-10 00:28:08 +03:00
Marcel Apfelbaum
947b205fdb tests/acpi: speedup acpi tests
Use kvm acceleration if available.
Disable kernel-irqchip and use qemu64 cpu
for both kvm and tcg cases.

Using kvm acceleration saves about a second
and disabling kernel-irqchip has no visible
performance impact.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-10 00:08:28 +03:00
Michael S. Tsirkin
71d19fc513 virtio-pci: minor refactoring
!legacy && !modern is shorter than !(legacy || modern).
I also perfer this (less ()s) as a matter of taste.

Cc: Greg Kurz <gkurz@linux.vnet.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Jason Wang
96a3d98d2c vhost: don't set vring call if no vector
We used to set vring call fd unconditionally even if guest driver does
not use MSIX for this vritqueue at all. This will cause lots of
unnecessary userspace access and other checks for drivers does not use
interrupt at all (e.g virtio-net pmd). So check and clean vring call
fd if guest does not use any vector for this virtqueue at
all.

Perf diffs (on rx) shows lots of cpus wasted on vhost_signal() were saved:

#
    28.12%  -27.82%  [vhost]           [k] vhost_signal
    14.44%   -1.69%  [kernel.vmlinux]  [k] copy_user_generic_string
     7.05%   +1.53%  [kernel.vmlinux]  [k] __free_page_frag
     6.51%   +5.53%  [vhost]           [k] vhost_get_vq_desc
...

Pktgen tests shows 15.8% improvement on rx pps and 6.5% on tx pps.

Before: RX 2.08Mpps TX 1.35Mpps
After:  RX 2.41Mpps TX 1.44Mpps

Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Greg Kurz
3eff376977 virtio-pci: error out when both legacy and modern modes are disabled
Without presuming if we got there because of a user mistake or some
more subtle bug in the tooling, it really does not make sense to
implement a non-functional device.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Ladi Prosek
4a1e48beca virtio-balloon: fix stats vq migration
The statistics virtqueue is not migrated properly because virtio-balloon
does not include s->stats_vq_elem in the migration stream.

After migration the statistics virtqueue hangs because the host never
completes the last element (s->stats_vq_elem is NULL on the destination
QEMU).  Therefore the guest never submits new elements and the virtqueue
is hung.

Instead of changing the migration stream format in an incompatible way,
detect the migration case and rewind the virtqueue so the last element
can be completed.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Roman Kagan <rkagan@virtuozzo.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Suggested-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Stefan Hajnoczi
297a75e6c5 virtio: add virtqueue_rewind()
virtqueue_discard() requires a VirtQueueElement but virtio-balloon does
not migrate its in-use element.  Introduce a new function that is
similar to virtqueue_discard() but doesn't require a VirtQueueElement.

This will allow virtio-balloon to access element again after migration
with the usual proviso that the guest may have modified the vring since
last time.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Roman Kagan <rkagan@virtuozzo.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Ladi Prosek
104e70cae7 virtio-balloon: discard virtqueue element on reset
The one pending element is being freed but not discarded on device
reset, which causes svq->inuse to creep up, eventually hitting the
"Virtqueue size exceeded" error.

Properly discarding the element on device reset makes sure that its
buffers are unmapped and the inuse counter stays balanced.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Roman Kagan <rkagan@virtuozzo.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Stefan Hajnoczi
4b7f91ed02 virtio: zero vq->inuse in virtio_reset()
vq->inuse must be zeroed upon device reset like most other virtqueue
fields.

In theory, virtio_reset() just needs assert(vq->inuse == 0) since
devices must clean up in-flight requests during reset (requests cannot
not be leaked!).

In practice, it is difficult to achieve vq->inuse == 0 across reset
because balloon, blk, 9p, etc implement various different strategies for
cleaning up requests.  Most devices call g_free(elem) directly without
telling virtio.c that the VirtQueueElement is cleaned up.  Therefore
vq->inuse is not decremented during reset.

This patch zeroes vq->inuse and trusts that devices are not leaking
VirtQueueElements across reset.

I will send a follow-up series that refactors request life-cycle across
all devices and converts vq->inuse = 0 into assert(vq->inuse == 0) but
this more invasive approach is not appropriate for stable trees.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Cc: qemu-stable <qemu-stable@nongnu.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Ladi Prosek <lprosek@redhat.com>
2016-09-09 20:58:34 +03:00
Marcel Apfelbaum
d9997d89a4 virtio-pci: reduce modern_mem_bar size
Currently each VQ Notification Virtio Capability is allocated
on a different page. The idea is to enable split drivers within
guests, however there are no known plans to do that.
The allocation will result in a 8MB BAR, more than various
guest firmwares pre-allocates for PCI Bridges hotplug process.

Reserve 4 bytes per VQ by default and add a new parameter
"page-per-vq" to be used with split drivers.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Longpeng(Mike)
14c985cffa target-i386: present virtual L3 cache info for vcpus
Some software algorithms are based on the hardware's cache info, for example,
for x86 linux kernel, when cpu1 want to wakeup a task on cpu2, cpu1 will trigger
a resched IPI and told cpu2 to do the wakeup if they don't share low level
cache. Oppositely, cpu1 will access cpu2's runqueue directly if they share llc.
The relevant linux-kernel code as bellow:

	static void ttwu_queue(struct task_struct *p, int cpu)
	{
		struct rq *rq = cpu_rq(cpu);
		......
		if (... && !cpus_share_cache(smp_processor_id(), cpu)) {
			......
			ttwu_queue_remote(p, cpu); /* will trigger RES IPI */
			return;
		}
		......
		ttwu_do_activate(rq, p, 0); /* access target's rq directly */
		......
	}

In real hardware, the cpus on the same socket share L3 cache, so one won't
trigger a resched IPIs when wakeup a task on others. But QEMU doesn't present a
virtual L3 cache info for VM, then the linux guest will trigger lots of RES IPIs
under some workloads even if the virtual cpus belongs to the same virtual socket.

For KVM, there will be lots of vmexit due to guest send IPIs.
The workload is a SAP HANA's testsuite, we run it one round(about 40 minuates)
and observe the (Suse11sp3)Guest's amounts of RES IPIs which triggering during
the period:
        No-L3           With-L3(applied this patch)
cpu0:	363890		44582
cpu1:	373405		43109
cpu2:	340783		43797
cpu3:	333854		43409
cpu4:	327170		40038
cpu5:	325491		39922
cpu6:	319129		42391
cpu7:	306480		41035
cpu8:	161139		32188
cpu9:	164649		31024
cpu10:	149823		30398
cpu11:	149823		32455
cpu12:	164830		35143
cpu13:	172269		35805
cpu14:	179979		33898
cpu15:	194505		32754
avg:	268963.6	40129.8

The VM's topology is "1*socket 8*cores 2*threads".
After present virtual L3 cache info for VM, the amounts of RES IPIs in guest
reduce 85%.

For KVM, vcpus send IPIs will cause vmexit which is expensive, so it can cause
severe performance degradation. We had tested the overall system performance if
vcpus actually run on sparate physical socket. With L3 cache, the performance
improves 7.2%~33.1%(avg:15.7%).

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Longpeng(Mike)
a4d3c83476 pc: Add 2.8 machine
This will used by the next patch.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Michael S. Tsirkin
e3aab6c7f3 virtio-pci: use size from correct structure
PIO MR registration should use size from the correct notify struct.
Doesn't affect any visible behaviour because the field values are the
same (both are 4).

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Thomas Huth
a8bba0ada4 virtio: Tell the user what went wrong when event_notifier_init failed
event_notifier_init() can fail in real life, for example when there
are not enough open file handles available (EMFILE) when using a lot
of devices. So instead of leaving the average user with a cryptic
error number only, print out a proper error message with strerror()
instead, so that the user has a better way to figure out what is
going on and that using "ulimit -n" might help here for example.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Peter Maydell
c2a57aae9a Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
# gpg: Signature made Fri 09 Sep 2016 05:54:35 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  docker: silence debootstrap when --quiet is given
  docker: build debootstrap after cloning
  docker: make sure debootstrap is at least 1.0.67
  docker: print warning if EXECUTABLE is not set when building debootstrap image
  docker: debian-bootstrap.pre: print helpful message if DEB_ARCH/DEB_TYPE unset
  docker: debian-bootstrap.pre: print error messages to stderr
  docker: avoid dependency on 'realpath' package
  docker.py: don't hang on large docker output
  docker: Add a glib2-2.22 image

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-09 12:49:41 +01:00
Peter Maydell
5f31bbf101 qtest.c: Allow zero size in memset qtest commands
Some tests use the qtest protocol "memset" command with a zero
size, expecting it to do nothing. However in the current code this
will result in calling memset() with a NULL pointer, which is
undefined behaviour. Detect and specially handle zero sizes to
avoid this.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1470393800-7882-1-git-send-email-peter.maydell@linaro.org
2016-09-09 11:16:18 +01:00
Peter Maydell
33e60e0198 Merge remote-tracking branch 'remotes/elmarco/tags/leak-pull-request' into staging
Pull request

v2:
- dropped "tests: fix small leak in test-io-channel-command" that Daniel Berrange will pick
- fixed "tests: add qtest_add_data_func_full" to work with glib < 2.26

# gpg: Signature made Thu 08 Sep 2016 15:16:54 BST
# gpg:                using RSA key 0xDAE8E10975969CE5
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>"
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* remotes/elmarco/tags/leak-pull-request: (25 commits)
  tests: fix postcopy-test leaks
  tests: fix rsp leak in postcopy-test
  tests: pc-cpu-test leaks fixes
  tests: add qtest_add_data_func_full
  bus: simplify name handling
  ipmi: free extern timer
  sd: free timer
  pc: keep gsi reference
  pc: free i8259
  tests: fix qom-test leaks
  acpi-build: fix array leak
  machine: use class base init generated name
  pc: don't leak a20_line
  pc: simplify passing qemu_irq
  portio: keep references on portio
  tests: fix leak in test-string-input-visitor
  tests: fix check-qom-proplist leaks
  tests: fix check-qom-interface leaks
  tests: fix test-iov leaks
  tests: fix test-vmstate leaks
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-08 15:22:50 +01:00
Marc-André Lureau
e2dd21e510 tests: fix postcopy-test leaks
A few strings are allocated and never freed.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
5b1ded224f tests: fix rsp leak in postcopy-test
In all cases, even when the dict doesn't contain 'ram', the qmp response
must be unref.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
34e46f604d tests: pc-cpu-test leaks fixes
The path is allocated and should be freed.

The qmp response should be unref, but then 'machine' must be duplicated.

Use a destroy function for the PCTestData.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
822e36ca35 tests: add qtest_add_data_func_full
Allows one to specify a destroy function for the test data.

Add a fallback using glib g_test_add_vtable() internal function, whose
signature changed over time. Tested with glib 2.22, 2.26 and 2.48, which
according to git log should be enough to cover all variations.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
f73480c36f bus: simplify name handling
Simplify a bit the code by using g_strdup_printf() and store it in a
non-const value so casting is no longer needed, and ownership is
clearer.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
e9529768d4 ipmi: free extern timer
Free the timer allocated during instance init.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Corey Minyard <cminyard@mvista.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
5ba344013c sd: free timer
Free the timer allocated in instance_init.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
3e6c0c4c2c pc: keep gsi reference
Further cleanup would need to call qemu_free_irq() at the appropriate
time, but for now this silences ASAN about direct leaks.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
8197e24c38 pc: free i8259
Simiarly to 2ba154cf4e

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
ff1685a333 tests: fix qom-test leaks
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
354fb471bd acpi-build: fix array leak
The free_ranges array is used as a temporary pointer array, the segment
should still be freed, however, it shouldn't free the elements themself.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Tested-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
8ea753718b machine: use class base init generated name
machine_class_base_init() member name is allocated by
machine_class_base_init(), but not freed by
machine_class_finalize().  Simply freeing there doesn't work,
because DEFINE_PC_MACHINE() overwrites it with a literal string.

Fix DEFINE_PC_MACHINE() not to overwrite it, and add the missing
free to machine_class_finalize().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
ac64c5fdf8 pc: don't leak a20_line
The irqs array is no longer being used

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
d80fe99de4 pc: simplify passing qemu_irq
qemu_irq is already a pointer, no need to have an extra pointer level.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
e305a16510 portio: keep references on portio
The isa_register_portio_list() function allocates ioports
data/state. Let's keep the reference to this data on some owner.  This
isn't enough to fix leaks, but at least, ASAN stops complaining of
direct leaks. Further cleanup would require calling
portio_list_del/destroy().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
bd794065ff tests: fix leak in test-string-input-visitor
Free the list returned by visit_type_intList().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
3972a4884d tests: fix check-qom-proplist leaks
Found thanks to ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
265804b5d7 tests: fix check-qom-interface leaks
Found thanks to ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
d55f295b2b tests: fix test-iov leaks
Spotted thanks to ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
4ae3c0e27f tests: fix test-vmstate leaks
Spotted thanks to ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
d6f723b513 tests: fix test-cutils leaks
Spotted thanks to ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
3e3e302ff3 qga: free remaining leaking state
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
2aa67a9196 qga: free the whole blacklist
Free the config blacklist list, not just the elements. Do it so in the
more appropriate function config_free().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
5c7e3e9fb1 glib-compat: add g_(s)list_free_full()
Those functions are only available since glib 2.28.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
1e2713384c tests: fix test-qga leaks
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Sascha Silbe
f8042deafa docker: silence debootstrap when --quiet is given
If we silence docker when --quiet is given, we should also silence the
.pre script (i.e. debootstrap).

Only discards stdout, so some diagnostics (e.g. from git clone) are
still printed. Most of the verbose output is gone however and this way
we still have a chance to see error messages.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-9-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
ae2f659ca5 docker: build debootstrap after cloning
When using the git version of debootstrap (because no usable version
of debootstrap was installed on the host), we need to run 'make' so
that devices.tar.gz gets built. Otherwise the first debootstrap stage
will fail without printing any error message.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-8-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
00263139f8 docker: make sure debootstrap is at least 1.0.67
debootstrap prior to 1.0.67 generated an empty sources.list during
foreign bootstraps (Debian#732255 [1]). Fall back to the git checkout
if the installed debootstrap version is too old.

[1] https://bugs.debian.org/732255

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-7-git-send-email-silbe@linux.vnet.ibm.com>
[Update 'sort -C' to 'sorc -c &>/dev/null' - Fam]
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
a351b4b06e docker: print warning if EXECUTABLE is not set when building debootstrap image
Building the debian-debootstrap image will usually fail if EXECUTABLE
isn't set (when using the Makefile). Warn the user in this case so
they know why it's failing.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-6-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
341edc0c47 docker: debian-bootstrap.pre: print helpful message if DEB_ARCH/DEB_TYPE unset
The debian-bootstrap image doesn't choose a default architecture and
distribution version, instead the user has to set both DEB_ARCH and
DEB_TYPE in the environment. Print a reasonably helpful message if
either of them isn't set instead of complaining about "qemu-" being
missing or erroring out because we cannot cd to the mirror URL.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-5-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
b5dc88ce24 docker: debian-bootstrap.pre: print error messages to stderr
Send error messages where they belong so they're seen even if stdout
is redirected to /dev/null.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-4-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
08f4e8d23d docker: avoid dependency on 'realpath' package
The 'realpath' executable is shipped in a separate package that isn't
installed by default on some distros.

We already use 'readlink -e' (provided by GNU coreutils) in some other
part of the code, so let's settle for that instead.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-3-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
c977257045 docker.py: don't hang on large docker output
Unlike Popen.communicate(), subprocess.call() doesn't read from the
stdout file descriptor. If the child process produces more output than
fits into the pipe buffer, it will block indefinitely.

If we don't intend to consume the output, just send it straight to
/dev/null to avoid this issue.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Message-Id: <1473192351-601-2-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Fam Zheng
9af4c174a3 docker: Add a glib2-2.22 image
It's a variation of our existing centos6, plus two more lines to
downgrade glib2 to version 2.22 which we download from vault.centos.org.

Suggested-by: Paolo Bonzini <pbonzoni@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1470708908-12885-1-git-send-email-famz@redhat.com>
2016-09-08 19:56:34 +08:00
Peter Maydell
59351d9b40 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.8-20160907' into staging
ppc patch queue for 2016-Sep-7

This is my first pull request for the newly opened qemu-2.8 tree.  It
contains a heap of things that were too late for 2.7 and have been
queued for a while.  In particular:
    * A number of preliminary patches for the powernv machine type
        * A substantial cleanup of exception handling which will be
          necessary to support running a TCG with hypervisor
          facilities
    * A start on support for POWER9
        * Some TCG implementations for new POWER9 instructions
        * Some TCG and related cleanups in preparation for POWER9
    * Some assorted TCG optimizations
    * An implementation of the H_CHANGE_LOGICAL_LAN_MAC hypercall
      which allows the MAC address to be changed on the PAPR virtual
      NIC.
    * Add some extra test cases for several machines (this isn't
      strictly in the ppc code, but is most value to ppc)

NOTE: This pull request supersedes ppc-for-2.8-20160906, which had
some problems.  Changes:
  * Dropped BenH's lmw/stmw speedups, which break for
    qemu-system-ppc64 on BE hosts
  * A small fix to Thomas' serial output test to avoid a warning on
    the isapc machine type.
  * Some trivial checkpatch fixes

Note that some of the patches in this series still have large numbers
of checkpatch warnings.  This is because they're moving existing code
that predates most of the checkpatch style conventions.

# gpg: Signature made Wed 07 Sep 2016 07:09:27 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.8-20160907: (64 commits)
  tests: Check serial output of firmware boot of some machines
  tests: Resort check-qtest entries in Makefile.include
  spapr: implement H_CHANGE_LOGICAL_LAN_MAC h_call
  ppc: Improve a few more helper flags
  ppc: Improve the exception helpers flags
  ppc: Improve flags for helpers loading/writing the time facilities
  ppc: Don't generate dead code on unconditional branches
  ppc: Stop dumping state on all exceptions in linux-user
  ppc: Fix catching some segfaults in user mode
  ppc: Fix macio ESCC legacy mapping
  hw/ppc: add a ppc_create_page_sizes_prop() helper routine
  hw/ppc: use error_report instead of fprintf
  ppc: Rename #include'd .c files to .inc.c
  target-ppc: add extswsli[.] instruction
  target-ppc: add vsrv instruction
  target-ppc: add vslv instruction
  target-ppc: add vcmpnez[b,h,w][.] instructions
  target-ppc: add vabsdu[b,h,w] instructions
  target-ppc: add dtstsfi[q] instructions
  target-ppc: implement branch-less divd[o][.]
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-08 11:28:12 +01:00
Peter Maydell
0813cbf913 tests/hd-geo-test: Don't pass NULL to unlink()
The unlink() function doesn't accept a NULL pointer, so
don't pass it one. Spotted by the clang sanitizer.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 1470391392-28274-1-git-send-email-peter.maydell@linaro.org
2016-09-08 10:43:58 +01:00
Thomas Huth
d2ab58ffc9 tests: Check serial output of firmware boot of some machines
Some of the machines that we have got a firmware image for write
some output to the serial console while booting up. We can use
this output to make sure that the machine is basically working,
so this adds a test that checks the output of these machines
for some well-known "magic" strings.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:13 +10:00
Thomas Huth
29531542bc tests: Resort check-qtest entries in Makefile.include
The rather random list of check-qtest-xxx entries caused some
confusion in the past, where to use "=" and where to use "+="
(see commits 0ccac16f59 and 1f5c1cfbae
for example).
Sorting the check-qtest-xxx entries by architecure instead and
using some empty lines inbetween should help to ease this
situation a little bit, so that it is hopefully now obvious
that new tests should be added with "+=" instead of "=".
While we are at it, this patch also comments out two of the
"gcov-files-..." lines since the corresponding m48t59-test is
disabled for sparc and sparc64, too.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:13 +10:00
Laurent Vivier
32f5f50dad spapr: implement H_CHANGE_LOGICAL_LAN_MAC h_call
Since kernel v4.0, linux uses H_CHANGE_LOGICAL_LAN_MAC to change lively
the MAC address of an ibmveth interface.

As QEMU doesn't implement this h_call, we can't change anymore the
MAC address of an spapr-vlan interface.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:13 +10:00
Benjamin Herrenschmidt
a007b19b37 ppc: Improve a few more helper flags
Mostly turn "store" type of helpers into TCG_CALL_NO_WG because
they can take exceptions. Also fixup_thrm doesn't read nor write
the tracked environment.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
76a3d2f750 ppc: Improve the exception helpers flags
They generate exceptions, but they don't update the environment

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
d0f6ced17f ppc: Improve flags for helpers loading/writing the time facilities
Those helpers never load from or store to the TCG tracked environment,
not do they generate synchronous exceptions (they might generate an
asynchronous interrupt but that's not an issue here).

So we can make them all use TCG_CALL_NO_RWG

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
accc60c47c ppc: Don't generate dead code on unconditional branches
We are always generating the "else" case of the condition even when
generating an unconditional branch that will never hit it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
4f5d326046 ppc: Stop dumping state on all exceptions in linux-user
Other archs don't do it, some programs catch signals just fine
and those dumps just clutter the output. Keep the dumps for cases
that aren't supposed to happen such as unknown codes.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
ba4a8df83f ppc: Fix catching some segfaults in user mode
The usermode "translate" code generates an error code value that
has the "is_write" bit set, which causes our switch/case to miss
and display "Invalid segfault errno" and a spurrious second state
dump. Fix it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
dd2fa4f72d ppc: Fix macio ESCC legacy mapping
The current mapping, while correct for the base ports (which is all the
driver uses these days), is wrong for the extended registers.

I suspect the bugs come from incorrect tables in the CHRP IO Ref document,
I have verified the new values here match Apple's MacTech.pdf.

Note: Nothing that I know of actually uses these registers so it's not a
huge deal, but this patch has the added advantage of adding comments to
document what the registers are.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Cédric Le Goater
3654fa95bc hw/ppc: add a ppc_create_page_sizes_prop() helper routine
The exact same routine will be used in PowerNV.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Cédric Le Goater
ce9863b797 hw/ppc: use error_report instead of fprintf
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
15848410af ppc: Rename #include'd .c files to .inc.c
Also while at it, group the #include statements in translate.c

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Nikunj A Dadhania
787bbe3711 target-ppc: add extswsli[.] instruction
extswsli : Extend Sign Word & Shift Left Immediate

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Vivek Andrew Sha
4004c1dbca target-ppc: add vsrv instruction
Adds Vector Shift Right Variable instruction.

Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
[ reverse the order of computation to avoid temporary array ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Vivek Andrew Sha
5644a17567 target-ppc: add vslv instruction
vslv: Vector Shift Left Variable

Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Swapnil Bokade
f7cc8466f1 target-ppc: add vcmpnez[b,h,w][.] instructions
Adds following instructions:

vcmpnezb[.]: Vector Compare Not Equal or Zero Byte
vcmpnezh[.]: Vector Compare Not Equal or Zero Halfword
vcmpnezw[.]: Vector Compare Not Equal or Zero Word

Signed-off-by: Swapnil Bokade <bokadeswapnil@gmail.com>
[ collapse switch case ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Sandipan Das
377070595a target-ppc: add vabsdu[b,h,w] instructions
Adds following instructions:

vabsdub: Vector Absolute Difference Unsigned Byte
vabsduh: Vector Absolute Difference Unsigned Halfword
vabsduw: Vector Absolute Difference Unsigned Word

Signed-off-by: Sandipan Das <sandipandas1990@gmail.com>
[ use ISA300 define. Drop etype ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Sandipan Das
217f6b8805 target-ppc: add dtstsfi[q] instructions
DFP Test Significance Immediate [Quad]

Signed-off-by: Sandipan Das <sandipandas1990@gmail.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Nikunj A Dadhania
4110b586de target-ppc: implement branch-less divd[o][.]
Similar to divw, implement branch-less divd.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Nikunj A Dadhania
b07c32dc4b target-ppc: implement branch-less divw[o][.]
While implementing modulo instructions figured out that the
implementation uses many branches. Change the logic to achieve the
branch-less code. Undefined value is set to dividend in case of invalid
input.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
5817355ed0 ppc: load/store multiple and string insns don't do LE
Just generate an alignment interrupt

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
65f2475f1f ppc: Use a helper to generate "LE unsupported" alignment interrupts
Some operations aren't allowed in LE mode, use a helper rather than
open coding the exception generation.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
5f2a625452 ppc: Don't set access_type on all load/stores on hash64
We don't use it so let's not generate the updates.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
fbc3b39b39 ppc: Fix CFAR updates
We were one instruction off

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
c9f82d013b ppc: Speed up dcbz
Use tlb_vaddr_to_host to do a fast path single translate for
the whole cache line. Also make the reservation check match
the entire range.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
22b56ee568 ppc: Handle unconditional (always/never) traps at translation time
We don't need to call a helper for trap always and trap never
which are used by Linux under some circumstances.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
--

v2. Don't generate the helper call when trapping always
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
3433b732a4 ppc: Make alignment exceptions suck less
The current alignment exception generation tries to load the opcode
to put in DSISR from a context where a cpu_ldl_code() is really not
a good idea. It might fault and longjmp out and that's not something
we want happening here.

Instead, pass the releavant opcode bits via the error_code.

There are a couple of cases of alignment interrupts that won't set
anything, the ones coming from access to direct store segments, but
that doesn't happen in practice, nobody used direct store segments
and they are gone from newer chips.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
b00a3b3648 ppc: Don't update NIP in dcbz and lscbx
Instead, pass GETPC() result to the corresponding helpers.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
573708e329 ppc: Don't update NIP if not taking alignment exceptions
Move the NIP update to after the conditional branch so that we
don't do it if we aren't going to take the alignment exception

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
72073dcce0 ppc: Don't update NIP on conditional trap instructions
This is no longer necessary as the helpers will properly retrieve
the return address when needed.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
8c8966e218 ppc: Don't update NIP BookE 2.06 tlbwe
This is no longer necessary as the helpers will properly retrieve
the return address when needed.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
57a2988b6f ppc: Don't update NIP in facility unavailable interrupts
This is no longer necessary as the helpers will properly retrieve
the return address when needed. Also remove gen_update_current_nip()
which didn't seem to make much sense to me.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
a13f0a9bc4 ppc: Don't update NIP in DCR access routines
This is no longer necessary as the helpers will properly retrieve
the return address when needed

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
0f72b7c682 ppc: Fix source NIP on SLB related interrupts
We need to pass it to the raise helper since we don't update it
before the calls.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
bd6fefe71c ppc: Make tlb_fill() use new exception helper
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:03 +10:00
Benjamin Herrenschmidt
af6d376ea1 ppc: Don't update NIP in lmw/stmw/icbi
Instead, pass GETPC() result to the corresponding helpers.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:38:00 +10:00
Benjamin Herrenschmidt
e41029b378 ppc: Don't update NIP in lswi/lswx/stswi/stswx
Instead, pass GETPC() result to the corresponding helpers. This
requires a bit of fiddling to get the PC (hopefully) right in
the case where we generate a program check, though the hacks there
are temporary, a subsequent patch will clean this all up by always
having the nip already set to the right instruction when taking
the fault.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[dwg: Fix trivial checkpatch warning]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:37:48 +10:00
Benjamin Herrenschmidt
1b7d17cae4 ppc: FP exceptions are always precise
We don't implement imprecise FP exceptions and using store_current
which sets SRR1 to the *previous* instruction never makes sense
for these. So let's be truthful and make them precise, which is
allowed by the architecture.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:47 +10:00
Benjamin Herrenschmidt
ef24726e48 ppc: Don't update the NIP in floating point generated code
This is no longer necessary as the helpers will properly retrieve
the return address.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:47 +10:00
Benjamin Herrenschmidt
44f35bd1ac ppc: Make float_check_status() pass the return address
Instead of relying on NIP having been updated already.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[dwg: Fold in fix to mark function always_inline]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:47 +10:00
Benjamin Herrenschmidt
a93ecff935 ppc: Make float_invalid_op_excp() pass the return address
Instead of relying on NIP having been updated already

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
f63fbc00d4 ppc: Rename fload_invalid_op_excp to float_invalid_op_excp
No other change

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
3014427af5 ppc: Move VSX ops out of translate.c
Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
0304af897b ppc: Move VMX ops out of translate.c
Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
8b25cdd371 ppc: Move DFP ops out of translate.c
Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
4083de6b53 ppc: Move embedded spe ops out of translate.c
Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
f96511215d ppc: Move classic fp ops out of translate.c
Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
db789c6cd3 ppc: Provide basic raise_exception_* functions
Instead of using the same helpers called from translate.c, let's have
a bunch of functions that take the various argument combinations,
especially the retaddr which will be needed in subsequent patches,
and leave the helpers to be just that, helpers for translate.c

We don't yet convert all users, we'll go through them in subsequent
patches.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
--

v2. Fix raise_exception_ra() to properly pass raddr
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Nikunj A Dadhania
323ad19bcc target-ppc: introduce opc4 for Expanded Opcode
ISA 3.0 has introduced EO - Expanded Opcode. Introduce third level
indirect opcode table and corresponding parsing routines.

EO (11:12) Expanded opcode field
Formats: XX1

EO (11:15) Expanded opcode field
Formats: VX, X, XX2

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
[dwg: Trivial checkpatch fixup]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:33 +10:00
Nikunj A Dadhania
5f29cc8292 target-ppc: add maddhd and maddhdu instruction
maddhd: Multiply-Add High Doubleword
maddhdu: Multiply-Add High Doubleword Unsigned

Above two instruction are dual form and differ by 1 bit
(31st bit)

Multiplies two 64-bit registers (RA * RB), adds third register(RC) to
the result(quadword) and returns the higher dword in the target
register(RT).

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
aeeb044c7b target-ppc: add maddld instruction
maddld: Multiply-Add Low Doubleword

Multiplies two 64-bit registers (RA * RB), adds third register(RC) to
the result(quadword) and returns the lower dword in the target
register(RT).

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Vivek Andrew Sha
dc2ee038da target-ppc: add setb instruction
The CR number is provided in the opcode as - BFA (11:13)

Returns:
  -1 if bit 0 of CR field is set
   1 if bit 1 of CR field is set
   0 otherwise.

Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
[ reworded commit, used 32bit ops as crf is 32bits ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
082ce33005 target-ppc: add cmpeqb instruction
Search a byte in the stream of 8bytes provided in the register

Suggested-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
b35344e4a0 target-ppc: add cnttzw[.] instruction
Add ISA3.0: Count trailing zeros word instruction.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Sandipan Das
e91d95b277 target-ppc: add cnttzd[.] instruction
Add ISA3.0 Count trailing zeros double word

Signed-off-by: Sandipan Das <sandipandas1990@gmail.com>
[ added ISA300 flag ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
063cf14fe3 target-ppc: add modulo dword operations
Adding following instructions for ISA3.0 support

modud: Modulo Unsigned Dword
modsd: Modulo Signed Dword

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
af2c66200e target-ppc: add modulo word operations
Adding following instructions:

moduw: Modulo Unsigned Word
modsw: Modulo Signed Word

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
f2442ef93c target-ppc: add cmprb instruction
ISA 3.0 Compare Ranged Byte instruction useful for
isupper/islower/isaplha kind of operation.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
c5b2b9ce12 target-ppc: adding addpcis instruction
ISA 3.0 instruction for adding immediate value shifted with next
instruction address and return the result in the target register.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
eb640b13a3 target-ppc: Introduce POWER ISA 3.0 flag
This flag will be used for POWER9 instructions.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Aneesh Kumar K.V
706d64675a target-ppc: Introduce Power9 family
The patch adds CPU PVR definition for POWER9 and enables QEMU to launch
guests/linux-user in TCG mode.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[ Added POWER9 alias, POWER9 SPAPR core and dropped MMU defines ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
[dwg: Dropped sPAPR core type again for now]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Cédric Le Goater
7804c353a9 hw/ppc: include fdt helper routine in a common file
spapr_pci would also be a good candidate but the macro _FDT is
slightly different. It returns and does not exit.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Greg Kurz
1b1746a436 xics_kvm: drop extra checking of kernel_xics_fd
We abort a few lines above if kernel_xics_fd == -1.

This is only code cleanup.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Peter Maydell
7faae0b36e Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160906-1' into staging
target-arm queue:
 * fix incorrect LPAE bit in FSR for alignment faults
 * ACPI: fix the AML ID format for CPU devices to work for
   large numbers of CPUs
 * ast2400: add memory controller device model
 * m25p80: fix the vmstate structure name (migration break)

# gpg: Signature made Tue 06 Sep 2016 20:02:28 BST
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20160906-1:
  block: m25p80: Fix vmstate structure name
  ARM: ACPI: fix the AML ID format for CPU devices
  target-arm: Fix lpae bit in FSR on an alignment fault
  ast2400: add a memory controller device model

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 20:03:58 +01:00
Marcin Krzeminski
c827c06a4d block: m25p80: Fix vmstate structure name
Correct bad name of the vmstate structure. Since this breaks
compatibility also update vmstate version back to 0 and make
all fields independent of the VMState version.

Signed-off-by: Marcin Krzeminski <marcin.krzeminski@nokia.com>
Acked-by: Alistair Francis <alistair.francis@xilinx.com>
Message-id: 1473146346-27337-1-git-send-email-marcin.krzeminski@nokia.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 19:52:18 +01:00
Wei Huang
f460be435f ARM: ACPI: fix the AML ID format for CPU devices
Current QEMU will stall guest VM booting under ACPI mode when vcpu count
is >= 12. Analyzing the booting log, it turns out that DSDT table can't
be loaded correctly due to "Invalid character(s) in name (0x62303043),
repaired: [C00*]". This is because existing QEMU uses a lower case AML
ID for CPU devices (e.g. C000, C001, ..., C00a, C00b). The ACPI code
inside guest VM detects this lower case character as an invalid character
(see acpi_ut_valid_acpi_char() in drivers/acpi/acpica/utstring.c file)
and converts it to "*". This causes duplicated IDs (i.e. "C00a" ==>"C00*"
and "C00b" ==> "C00*"). So ACPI refuses to load the table.

This patch fixes the problem by changing the format with a upper case
character. It matches the CPU ID formats used in other parts of QEMU
code.

Reported-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Wei Huang <wei@redhat.com>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1472852809-23042-1-git-send-email-wei@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 19:52:17 +01:00
Sergey Sorokin
e0fe723c24 target-arm: Fix lpae bit in FSR on an alignment fault
If an alignment fault occurred and target EL is using AArch32,
then DFSR/IFSR bit LPAE[9] must be set correctly.

Signed-off-by: Sergey Sorokin <afarallax@yandex.ru>
Message-id: 1471283293-169850-1-git-send-email-afarallax@yandex.ru
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 19:52:17 +01:00
Cédric Le Goater
c2da8a8b90 ast2400: add a memory controller device model
The uboot in the previous release of the SDK was using a hardcoded
value for memory size. This is not true anymore, the value is now
retrieved from the memory controller.

Below is a model for this device, only supporting unlock and
configuration. Without it, we endup running a guest with 64MB, which
is a bit low nowdays. It uses a 'silicon-rev' property and ram_size to
build a default value. Some bits should be linked to SCU strapping
registers but it seems a bit complex to add for the current need.

The model is ready for the AST2500 SOC.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 19:52:17 +01:00
Peter Maydell
2926375cff Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Tue 06 Sep 2016 11:38:01 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (36 commits)
  block: Allow node name for 'qemu-io' HMP command
  qemu-iotests: Log QMP traffic in debug mode
  block jobs: Improve error message for missing job ID
  coroutine: Assert that no locks are held on termination
  coroutine: Let CoMutex remember who holds it
  qcow2: fix iovec size at qcow2_co_pwritev_compressed
  test-coroutine: Fix coroutine pool corruption
  qemu-iotests: add vmdk for test backup compression in 055
  qemu-iotests: test backup compression in 055
  blockdev-backup: added support for data compression
  drive-backup: added support for data compression
  block: simplify blockdev-backup
  block: simplify drive-backup
  block/io: turn on dirty_bitmaps for the compressed writes
  block: remove BlockDriver.bdrv_write_compressed
  qcow: cleanup qcow_co_pwritev_compressed to avoid the recursion
  qcow: add qcow_co_pwritev_compressed
  vmdk: add vmdk_co_pwritev_compressed
  qcow2: cleanup qcow2_co_pwritev_compressed to avoid the recursion
  qcow2: add qcow2_co_pwritev_compressed
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 17:18:17 +01:00
Peter Maydell
f9ae6bcf1d Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160906-v2' into staging
First (big) chunk of s390x updates:
- cpumodel support for s390x
- various fixes and improvements

# gpg: Signature made Tue 06 Sep 2016 16:09:53 BST
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20160906-v2: (38 commits)
  s390x/cpumodel: implement QMP interface "query-cpu-model-baseline"
  s390x/cpumodel: implement QMP interface "query-cpu-model-comparison"
  s390x/cpumodel: implement QMP interface "query-cpu-model-expansion"
  qmp: add QMP interface "query-cpu-model-baseline"
  qmp: add QMP interface "query-cpu-model-comparison"
  qmp: add QMP interface "query-cpu-model-expansion"
  s390x/kvm: don't enable key wrapping if msa3 is disabled
  s390x/kvm: let the CPU model control CMM(A)
  s390x/kvm: disable host model for problematic compat machines
  s390x/kvm: implement CPU model support
  s390x/kvm: allow runtime-instrumentation for "none" machine
  s390x/sclp: propagate hmfai
  s390x/sclp: propagate the mha via sclp
  s390x/sclp: propagate the ibc val (lowest and unblocked ibc)
  s390x/sclp: indicate sclp features
  s390x/sclp: introduce sclp feature blocks
  s390x/sclp: factor out preparation of cpu entries
  s390x/cpumodel: check and apply the CPU model
  s390x/cpumodel: let the CPU model handle feature checks
  s390x/cpumodel: expose features and feature groups as properties
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 16:17:19 +01:00
David Hildenbrand
f1a47d08ef s390x/cpumodel: implement QMP interface "query-cpu-model-baseline"
Let's implement that interface by reusing our conversion code and
lookup code for CPU definitions.

In order to find a compatible CPU model, we first detect the maximum
possible CPU generation and then try to find a maximum model, satisfying
all base features (not exceeding the maximum generation).

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-31-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
4e82ef0502 s390x/cpumodel: implement QMP interface "query-cpu-model-comparison"
Let's implement that interface by reusing our convertion code implemented
for expansion.

We use CPU generations and CPU features to calculate the result. This
means, that a zEC12 cannot simply be converted into a z13 by stripping
of features. This is required, as other magic values (e.g. maximum
address sizes) belong to a CPU generation and cannot simply be
emulated by an older generation.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-30-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
137974cea3 s390x/cpumodel: implement QMP interface "query-cpu-model-expansion"
In order to expand CPU models, we create temporary cpus that handle the
feature/group parsing. Only CPU feature properties are expanded.

When converting the data structure back, we always fall back to the
static base CPU model, which is by definition migration-safe.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-29-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
b18b604334 qmp: add QMP interface "query-cpu-model-baseline"
Let's provide a standardized interface to baseline two CPU models, to
create a third, compatible one. This is especially helpful when two
CPU models are not identical, but a CPU model is required that is
guaranteed to run under both configurations, where the original models run.

"query-cpu-model-baseline" takes two CPU models and returns a third,
compatible model. The result will always be a static CPU model.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-28-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
0031e0d683 qmp: add QMP interface "query-cpu-model-comparison"
Let's provide a standardized interface to compare two CPU models.
"query-cpu-model-compare" takes two models and returns  how they compare
in a specific configuration.

The result will give guarantees about runnability. E.g. if a CPU model A
is a subset of CPU model B, model A is guaranteed to run in configurations
where model B runs, but not the other way around (might or might not run).

Usually, CPU features or CPU generations are used to calculate the result.
If a model is not guaranteed to run in a certain environment (e.g.
incompatible), a  compatible one can be created by "baselining" both models
(follow up patch).

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-27-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
e09484efbc qmp: add QMP interface "query-cpu-model-expansion"
Let's provide a standardized interface to expand CPU models. This interface
can be used by tooling to get details about a specific CPU model in a
certain configuration, e.g. about the "host" model.

To take care of all architectures, two detail levels for an expansion
are introduced. Certain architectures might not support all detail levels.
While "full" will expand and indicate all relevant properties/features
of a CPU model, "static" expands to a static base CPU model, that will
never change between QEMU versions and therefore have the same features
when used under different compatibility machines.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-26-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
c85d21c73b s390x/kvm: don't enable key wrapping if msa3 is disabled
As the CPU model now controls msa3, trying to set wrapping keys without
msa3 being around/enable in the kernel will produce misleading errors.

So let's simply not configure key wrapping if msa3 is not enabled and
make compat machines with disabled CPU model work correctly.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-25-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
07059effd1 s390x/kvm: let the CPU model control CMM(A)
Starting with recent kernels, if the cmma attributes are available, we
actually have hardware support. Enabling CMMA then means providing the
guest VCPU with CMM, therefore enabling its CMM facility.

Let's not blindly enable CMM anymore but let's control it using CPU models.
For disabled CPU models, CMMA will continue to always get enabled.

Also enable it in the applicable default models.

Please note that CMM doesn't work with hugetlbfs, therefore we will
warn the user and keep it disabled. Migrating from/to a hugetlbfs
configuration works, as it will be disabled on both sides.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-24-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
34821036cd s390x/kvm: disable host model for problematic compat machines
Compatibility machines that touch runtime-instrumentation should not
be used with the CPU model. Otherwise the host model will look different,
depending on the QEMU machine QEMU has been started with.

So let's simply disable the host model for existing compatibility machines
that all disable ri. This, in return, disables the CPU model for these
compat machines completely.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-23-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
3b84c25cc7 s390x/kvm: implement CPU model support
Let's implement our two hooks so we can support CPU models.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-22-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
392529cb77 s390x/kvm: allow runtime-instrumentation for "none" machine
To be able to query the correct host model for the "none" machine,
let's allow runtime-instrumentation for that machine.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-21-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
a366930780 s390x/sclp: propagate hmfai
hmfai is provided on CPU models >= z196. Let's propagate it properly.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-19-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
3fad3252a3 s390x/sclp: propagate the mha via sclp
The mha is provided in the CPU model, so get any CPU and extract the value.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-18-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
059be520d5 s390x/sclp: propagate the ibc val (lowest and unblocked ibc)
If we have a lowest ibc, we can indicate the ibc to the guest.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-17-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
4dd4200ee7 s390x/sclp: indicate sclp features
We have three different blocks in the SCLP read-SCP information response
that indicate sclp features. Let's prepare propagation.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-16-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
1c07e01b61 s390x/sclp: introduce sclp feature blocks
The sclp "read cpu info" and "read scp info" commands can include
features for the cpu info and configuration characteristics (extended),
decribing some advanced features available in the configuration.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-15-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
026546e6c3 s390x/sclp: factor out preparation of cpu entries
Let's factor out the common code of "read cpu info" and "read scp
info". This will make the introduction of new cpu entry fields easier.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-14-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
80560137cf s390x/cpumodel: check and apply the CPU model
We have to test if a configured CPU model is runnable in the current
configuration, and if not report why that is the case. This is done by
comparing it to the maximum supported model (host for KVM or z900 for TCG).
Also, we want to do some base sanity checking for a configured CPU model.

We'll cache the maximum model and the applied model (for performance
reasons and because KVM can only be configured before any VCPU is created).

For unavailable "host" model, we have to make sure that we inform KVM,
so it can do some compatibility stuff (enable CMMA later on to be precise).

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-13-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
7c72ac49ae s390x/cpumodel: let the CPU model handle feature checks
If we have certain features enabled, we have to migrate additional state
(e.g. vector registers or runtime-instrumentation registers). Let the
CPU model control that unless we have no "host" CPU model in the KVM
case. This will later on be the case for compatibility machines, so
migration from QEMU versions without the CPU model will still work.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-12-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
0754f60429 s390x/cpumodel: expose features and feature groups as properties
Let's add all features and feature groups as properties to all CPU models.
If the "host" CPU model is unknown, we can neither query nor change
features. KVM will just continue to work like it did until now.

We will not allow to enable features that were not part of the original
CPU model, because that could collide with the IBC in KVM.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-11-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
ad5afd07b6 s390x/cpumodel: store the CPU model in the CPU instance
A CPU model consists of a CPU definition, to which delta changes are
applied - features added or removed (e.g. z13-base,vx=on). In addition,
certain properties (e.g. cpu id) can later on change during migration
but belong into the CPU model. This data will later be filled from the
host model in the KVM case.

Therefore, store the configured CPU model inside the CPU instance, so
we can later on perform delta changes using properties.

For the "qemu" model, we emulate in TCG a z900. "host" will be
uninitialized (cpu->model == NULL) unless we have CPU model support in KVM
later on. The other models are all initialized from their definitions.
Only the "host" model can have a cpu->model == NULL.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-10-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
6c064de1e0 s390x/cpumodel: register defined CPU models as subclasses
This patch adds the CPU model definitions that are known on s390x -
like z900, zBC12 or z13. For each definition, introduce two CPU models:

1. Base model (e.g. z13-base): Minimum feature set we expect to be around
   on all z13 systems. These models are migration-safe and will never
   change.
2. Flexible models (e.g. z13): Models that can change between QEMU versions
   and will be extended over time as we implement further features that
   are already part of such a model in real hardware of certain
   configurations.

We want to work on features using ordinary bitmap operations, however we
can't initialize a bitmap statically (unsigned long[] ...). Therefore we
store the generated feature lists in separate arrays and convert them to
proper bitmaps before registering all our CPU model classes.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-9-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
David Hildenbrand
8b3d6cb1fa s390x/cpumodel: introduce CPU feature group definitions
Let's use the generated groups to create feature group representations for
the user. These groups can later be used to enable/disable multiple
features in one shot and will be used to reduce the amount of reported
features to the user if all subfeatures are in place.

We want to work on features using ordinary bitmap operations, however we
can't initialize a bitmap statically (unsigned long[] ...). Therefore
we store the generated feature lists in separate arrays and convert
them to a proper bitmaps before they will ever be used.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-8-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
David Hildenbrand
90229ebbad s390x/cpumodel: generate CPU feature group lists
Feature groups will be very helpful to reduce the amount of features
typically available in sane configurations. E.g. the MSA facilities
introduced loads of subfunctions, which could - in theory - go away
in the future, but we want to avoid reporting hundrets of features to
the user if usually all of them are in place.

Groups only contain features that were introduced in one shot, not just
random features. Therefore, groups can never change. This is an important
property regarding migration.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-7-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
Michael Mueller
dced7eec3c s390x/cpumodel: generate CPU feature lists for CPU models
This patch introduces the helper "gen-features" which allows to generate
feature list definitions at compile time. Its flexibility is better and the
error-proneness is lower when compared to static programming time added
statements.

The helper includes "target-s390x/cpu_features.h" to be able to use named
facility bits instead of numbers. The generated defines will be used for
the definition of CPU models.

We generate feature lists for each HW generation and GA for EC models. BC
models are always based on a EC version and have no separate definitions.

Base features: Features we expect to be always available in sane setups.
Migration safe - will never change. Can be seen as "minimum features
required for a CPU model".

Default features: Features we expect to be stable and around in latest
setups (e.g. having KVM support) - not migration safe.

Max features: All supported features that are theoretically allowed for a
CPU model. Exceeding these features could otherwise produce problems with
IBC (instruction blocking controls) in KVM.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael Mueller <mimu@linux.vnet.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
[generate base, default and models. renaming and cleanup]
Message-Id: <20160905085244.99980-6-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
Michael Mueller
7824174462 s390x/cpumodel: introduce CPU features
The patch introduces s390x CPU features (most of them refered to as
facilities) along with their discription and some functions that will be
helpful when working with the features later on.

Please note that we don't introduce all known CPU features, only the
ones currently supported by KVM + QEMU. We don't want to enable later
on blindly any facilities, for which we don't know yet if we need QEMU
support to properly support them (e.g. migrate additional state when
active). We can update QEMU later on.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael Mueller <mimu@linux.vnet.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
[reworked to include non-stfle features, added definitions]
Message-Id: <20160905085244.99980-5-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
David Hildenbrand
6efadc9050 s390x/cpumodel: expose CPU class properties
Let's expose the description and migration safety and whether a definition
is static, as class properties, this can be helpful in the future.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-4-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
David Hildenbrand
41868f846d s390x/cpumodel: "host" and "qemu" as CPU subclasses
This patch introduces two CPU models, "host" and "qemu".
"qemu" is used as default when running under TCG. "host" is used
as default when running under KVM. "host" cannot be used without KVM.
"host" is not migration-safe. They both inherit from the base s390x CPU,
which is turned into an abstract class.

This patch also changes CPU creation to take care of the passed CPU string
and reuses common code parse_features() function for that purpose. Unknown
CPU definitions are now reported. The "-cpu ?" and "query-cpu-definition"
commands are changed to list all CPU subclasses automatically, including
migration-safety and whether static.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-3-dahi@linux.vnet.ibm.com>
[CH: fix up self-assignments in s390_cpu_list, as spotted by clang]
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
Peter Maydell
085c915019 Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
x86 and memory backends queue, 2016-09-05

This includes a few features that were submitted just after hard
freeze, and a bug fix for memory backend initialization ordering.

# gpg: Signature made Mon 05 Sep 2016 20:50:14 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request:
  vl: Delay initialization of memory backends
  vhost-user-test: Use libqos instead of pxe-virtio.rom
  target-i386: Add more Intel AVX-512 instructions support
  exec: Ensure the only one cpu_index allocation method is used

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 13:33:17 +01:00
Peter Maydell
30e7d092b2 Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Mon 05 Sep 2016 20:41:04 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  trace: Avoid implicit bool->integer conversions
  trace: Remove 'trace_events_dstate_init'
  trace: add syslog tracing backend

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 12:41:24 +01:00
Peter Maydell
1fd66154fd Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20160905' into staging
Xen 2016/09/05

# gpg: Signature made Mon 05 Sep 2016 19:59:47 BST
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20160905:
  xen: use native disk xenbus protocol if possible

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 11:43:18 +01:00
Eduardo Habkost
6546d0dba6 vl: Delay initialization of memory backends
Initialization of memory backends may take a while when
prealloc=yes is used, depending on their size. Initializing
memory backends before chardevs may delay the creation of monitor
sockets, and trigger timeouts on management software that waits
until the monitor socket is created by QEMU. See, for example,
the bug report at:
https://bugzilla.redhat.com/show_bug.cgi?id=1371211

In addition to that, allocating memory before calling
configure_accelerator() breaks the tcg_enabled() checks at
memory_region_init_*().

This patch fixes those problems by adding "memory-backend-*"
classes to the delayed-initialization list.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-09-05 16:03:47 -03:00
Eduardo Habkost
cdafe92961 vhost-user-test: Use libqos instead of pxe-virtio.rom
vhost-user-test relies on iPXE just to initialize the virtio-net
device, and doesn't do any actual packet tx/rx testing.

In addition to that, the test relies on TCG, which is
imcompatible with vhost. The test only worked by accident: a bug
the memory backend initialization made memory regions not have
the DIRTY_MEMORY_CODE bit set in dirty_log_mask.

This changes vhost-user-test to initialize the virtio-net device
using libqos, and not use TCG nor pxe-virtio.rom.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-09-05 16:03:47 -03:00
Luwei Kang
cc728d1493 target-i386: Add more Intel AVX-512 instructions support
Add more AVX512 feature bits, include AVX512DQ, AVX512IFMA,
AVX512BW, AVX512VL, AVX512VBMI. Its spec can be found at:
https://software.intel.com/sites/default/files/managed/b4/3a/319433-024.pdf

Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-09-05 16:01:55 -03:00
Igor Mammedov
630eb0faf4 exec: Ensure the only one cpu_index allocation method is used
Make sure that cpu_index auto allocation isn't used in
combination with manual cpu_index assignment. And
dissallow out of order cpu removal if auto allocation
is in use.

Target that wishes to support out of order unplug should
switch to manual cpu_index assignment. Following patch
could be used as an example:
 (pc: init CPUState->cpu_index with index in possible_cpus[]))

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-09-05 16:01:55 -03:00
Lluís Vilanova
8eb1b9db55 trace: Avoid implicit bool->integer conversions
An explicit if/else is clearer than arithmetic assuming #true is 1,
while the compiler should be able to generate just as optimal code.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 147194273830.26836.5875729707953474838.stgit@fimbulvetr.bsc.es
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 13:47:02 -04:00
Lluís Vilanova
a4d50b1d2a trace: Remove 'trace_events_dstate_init'
Removes the event state array used for early initialization. Since only
events with the "vcpu" property need a late initialization fixup,
threats their initialization specially.

Assumes that the user won't touch the state of "vcpu" events between
early and late initialization (e.g., through QMP).

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 147194273191.26836.14423079546263831356.stgit@fimbulvetr.bsc.es
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 13:47:01 -04:00
Paul Durrant
0a85241756 trace: add syslog tracing backend
This patch adds a tracing backend which sends output using syslog().
The syslog backend is limited to POSIX compliant systems.

openlog() is called with facility set to LOG_DAEMON, with the LOG_PID
option. Trace events are logged at level LOG_INFO.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Message-id: 1470318254-29989-1-git-send-email-paul.durrant@citrix.com
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 13:47:01 -04:00
Kevin Wolf
e7f98f2f92 block: Allow node name for 'qemu-io' HMP command
When using a node name, create a temporary BlockBackend that is used to
run the qemu-io command.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-05 19:06:48 +02:00
Kevin Wolf
c0088d79a7 qemu-iotests: Log QMP traffic in debug mode
Python tests are already annoying enough to debug. With QMP traffic
available it's a little bit easier at least.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-05 19:06:48 +02:00
Kevin Wolf
1562047c89 block jobs: Improve error message for missing job ID
If a block job is started with a node name rather than a device name and
no explicit job ID is passed, it was reported that '' isn't a
well-formed ID. Which is correct, but we can make the message a little
bit nicer.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 19:06:48 +02:00
Kevin Wolf
1b7f01d966 coroutine: Assert that no locks are held on termination
A coroutine that takes a lock must also release it again. If the
coroutine terminates without having released all its locks, it's buggy
and we'll probably run into a deadlock sooner or later. Make sure that
we don't get such cases.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 19:06:48 +02:00
Kevin Wolf
0e438cdc93 coroutine: Let CoMutex remember who holds it
In cases of deadlocks, knowing who holds a given CoMutex is really
helpful for debugging. Keeping the information around doesn't cost much
and allows us to add another assertion to keep the code correct, so
let's just add it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
8b2bd09338 qcow2: fix iovec size at qcow2_co_pwritev_compressed
Use bytes as the size would be more exact than s->cluster_size.  Although
qemu_iovec_to_buf() will not allow to go beyond the qiov.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Kevin Wolf
980e66216f test-coroutine: Fix coroutine pool corruption
The test case overwrites the Coroutine object with 0xff as a way to
assert that the coroutine isn't used any more. However, this means that
the coroutine pool now contains a corrupted object and later test cases
may get this corrupted object and crash.

This patch saves the real content of the object and restores it after
completing the test. The only use of the coroutine pool between those
two points is the deletion of co2. As this only means an insertion at
the head of an SLIST (release_pool or alloc_pool), it doesn't access the
invalid list pointers that co1 has during this period.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
00198ecc77 qemu-iotests: add vmdk for test backup compression in 055
The vmdk format has support for compression, it would be fine to add it for
the test backup compression

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
e1b5c51f4c qemu-iotests: test backup compression in 055
Added cases to check the backup compression out of qcow2, raw in qcow2
on drive-backup and blockdev-backup.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
3b7b123659 blockdev-backup: added support for data compression
The idea is simple - backup is "written-once" data. It is written block
by block and it is large enough. It would be nice to save storage
space and compress it.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
13b9414b57 drive-backup: added support for data compression
The idea is simple - backup is "written-once" data. It is written block
by block and it is large enough. It would be nice to save storage
space and compress it.

The patch adds a flag to the qmp/hmp drive-backup command which enables
block compression. Compression should be implemented in the format driver
to enable this feature.

There are some limitations of the format driver to allow compressed writes.
We can write data only once. Though for backup this is perfectly fine.
These limitations are maintained by the driver and the error will be
reported if we are doing something wrong.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
dc7a4a9ed1 block: simplify blockdev-backup
Now that we can support boxed commands, use it to greatly reduce the
number of parameters (and likelihood of getting out of sync) when
adjusting blockdev-backup parameters.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
81206a8987 block: simplify drive-backup
Now that we can support boxed commands, use it to greatly reduce the
number of parameters (and likelihood of getting out of sync) when
adjusting drive-backup parameters.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
3ea1a09111 block/io: turn on dirty_bitmaps for the compressed writes
Previously was added the assert:

  commit 1755da16e3
  Author: Paolo Bonzini <pbonzini@redhat.com>
  Date:   Thu Oct 18 16:49:18 2012 +0200
  block: introduce new dirty bitmap functionality

Now the compressed write is always in coroutine and setting the bits is
done after the write, so that we can return the dirty_bitmaps for the
compressed writes.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
35fadca80e block: remove BlockDriver.bdrv_write_compressed
There are no block drivers left that implement the old
.bdrv_write_compressed interface, so it can be removed. Also now we have
no need to use the bdrv_pwrite_compressed function and we can remove it
entirely.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
655923df4b qcow: cleanup qcow_co_pwritev_compressed to avoid the recursion
Now that the function uses a vector instead of a buffer, there is no
need to use recursive code.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
f2b95a1231 qcow: add qcow_co_pwritev_compressed
Added implementation of the qcow_co_pwritev_compressed function that
will allow us to safely use compressed writes for the qcow from running
VMs.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
b2c622d365 vmdk: add vmdk_co_pwritev_compressed
Added implementation of the vmdk_co_pwritev_compressed function that
will allow us to safely use compressed writes for the vmdk from running
VMs.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
a2c0ca6f55 qcow2: cleanup qcow2_co_pwritev_compressed to avoid the recursion
Now that the function uses a vector instead of a buffer, there is no
need to use recursive code.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
fcccefc57f qcow2: add qcow2_co_pwritev_compressed
Added implementation of the qcow2_co_pwritev_compressed function that
will allow us to safely use compressed writes for the qcow2 from running
VMs.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
29a298af9d block/io: reuse bdrv_co_pwritev() for write compressed
For bdrv_pwrite_compressed() it looks like most of the code creating
coroutine is duplicated in bdrv_prwv_co(). So we can just add a flag
(BDRV_REQ_WRITE_COMPRESSED) and use bdrv_prwv_co() as a generic one.
In the end we get coroutine oriented function for write compressed by using
bdrv_co_pwritev/blk_co_pwritev with BDRV_REQ_WRITE_COMPRESSED flag.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
751e2f0698 block: Convert bdrv_pwrite_compressed() to BdrvChild
Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:47 +02:00
Pavel Butsykin
fe5c1355e7 block: switch blk_write_compressed() to byte-based interface
This is a preparatory patch, which continues the general trend of the
transition to the byte-based interfaces. bdrv_check_request() and
blk_check_request() are no longer used, thus we can remove them.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
094138d09e nbd-server: Allow node name for nbd-server-add
There is no reason why an NBD server couldn't be started for any node,
even if it's not on the top level. This converts nbd-server-add to
accept a node-name.

Note that there is a semantic difference between using a BlockBackend
name and the node name of its root: In the former case, the NBD server
is closed on eject; in the latter case, the NBD server doesn't drop its
reference and keeps the image file open this way.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
cd7fca952c nbd-server: Use a separate BlockBackend
The builtin NBD server uses its own BlockBackend now instead of reusing
the monitor/guest device one.

This means that it has its own writethrough setting now. The builtin
NBD server always uses writeback caching now regardless of whether the
guest device has WCE enabled. qemu-nbd respects the cache mode given on
the command line.

We still need to keep a reference to the monitor BB because we put an
eject notifier on it, but we don't use it for any I/O.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
0524e93a3f block: Accept node-name for drive-mirror
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
drive-mirror to accept a node-name without lifting the restriction that
we're operating at a root node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
b7e4fa2242 block: Accept node-name for drive-backup
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
drive-backup and the corresponding transaction action to accept a
node-name without lifting the restriction that we're operating at a root
node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
7b5dca3f02 block: Accept node-name for change-backing-file
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
change-backing-file to accept a node-name without lifting the
restriction that we're operating at a root node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
75dfd402a7 block: Accept node-name for blockdev-snapshot-internal-sync
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
blockdev-snapshot-internal-sync to accept a node-name without lifting
the restriction that we're operating at a root node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
2dfb4c033f block: Accept node-name for blockdev-snapshot-delete-internal-sync
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
blockdev-snapshot-delete-internal-sync to accept a node-name without
lifting the restriction that we're operating at a root node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
07eec65272 block: Accept node-name for blockdev-mirror
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
blockdev-mirror to accept a node-name without lifting the restriction
that we're operating at a root node.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
cef34eebf3 block: Accept node-name for blockdev-backup
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
blockdev-backup and the corresponding transaction action to accept a
node-name without lifting the restriction that we're operating at a root
node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
1d13b167fd block: Accept node-name for block-commit
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
block-commit to accept a node-name without lifting the restriction that
we're operating at a root node.

As libvirt makes use of the DeviceNotFound error class, we must add
explicit code to retain this behaviour because qmp_get_root_bs() only
returns GenericErrors.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
b6c1bae5df block: Accept node-name for block-stream
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
block-stream to accept a node-name without lifting the restriction that
we're operating at a root node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
9ef6e505f0 scsi: scsi-cd without drive property for empty drive
This allows the creation of an empty scsi-cd device without manually
creating a BlockBackend.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
67c75f3dff ide: ide-cd without drive property for empty drive
This allows the creation of an empty ide-cd device without manually
creating a BlockBackend.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2016-09-05 19:06:47 +02:00
David Hildenbrand
fc4b84b1c6 qmp: details about CPU definitions in query-cpu-definitions
It might be of interest for tooling whether a CPU definition can be safely
used when migrating, or if e.g. CPU features might get lost during
migration when migrationg from/to a different QEMU version or host, even if
the same compatibility machine is used.

Also, we want to know if a CPU definition is static and will never change.
Beause these definitions can then be used independantly of a compatibility
machine and will always have the same feature set, they can e.g. be used
to indicate the "host" model in libvirt later on.

Let's add two return values to query-cpu-definitions, stating for each
returned CPU definition, if it is migration-safe and if it is static.

While "migration-safe" is optional, "static" will be set to "false"
automatically by all implementing architectures. If a model really was
static all the time and will be in the future, this can simply be changed
later.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-2-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:42:36 +02:00
David Hildenbrand
b60fae32ff s390x/kvm: 2 byte software breakpoint support
Diag 501 (4 bytes) was used until now for software breakpoints on s390.
As instructions on s390 might be 2 bytes long, temporarily overwriting them
with 4 bytes is evil and can result in very strange guest behaviour.

We make use of invalid instruction 0x0000 as new sw breakpoint instruction.
We have to enable interception of that instruction in KVM using a
capability.

If no software breakpoint has been inserted at the reported position, an
operation exception has to be injected into the guest. Otherwise a
breakpoint has been hit and the pc has to be rewound.

If KVM doesn't yet support interception of instruction 0x0000 the
existing mechanism exploiting diag 501 is used. To keep overhead low,
interception of instruction 0x0000 will only be enabled if sw breakpoints
are really used.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Cornelia Huck
dbdfea9226 linux-headers: update
Update headers against 4.8-rc2.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Cornelia Huck
882b3b9769 s390x/css: handle cssid 255 correctly
The cssid 255 is reserved but still valid from an architectural
point of view. However, feeding a bogus schid of 0xffffffff into
the virtio hypercall will lead to a crash:

Stack trace of thread 138363:
        #0  0x00000000100d168c css_find_subch (qemu-system-s390x)
        #1  0x00000000100d3290 virtio_ccw_hcall_notify
        #2  0x00000000100cbf60 s390_virtio_hypercall
        #3  0x000000001010ff7a handle_hypercall
        #4  0x0000000010079ed4 kvm_cpu_exec (qemu-system-s390x)
        #5  0x00000000100609b4 qemu_kvm_cpu_thread_fn
        #6  0x000003ff8b887bb4 start_thread (libpthread.so.0)
        #7  0x000003ff8b78df0a thread_start (libc.so.6)

This is because the css array was only allocated for 0..254
instead of 0..255.

Let's fix this by bumping MAX_CSSID to 255 and fencing off the
reserved cssid of 255 during css image allocation.

Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Cornelia Huck
5759db1936 s390x/ioinst: advertise fcs facility
As we provide format 1 chsc scpd data (and don't support any ficon
channels), we de facto already have the ficon-cascaded-switch
facility.

Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Cornelia Huck
f2cab7f148 s390x: wrap flic savevm calls into vmstate
Just a simple conversion to get rid of register_savevm.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Christian Borntraeger
989fd865f5 s390/sclp: cache the sclp device
With the current code a simple sclp command takes about 13000 ns
The biggest part seems to be the resolver of the object model. By
caching the sclp device the time for an sclp command goes down to
2500ns. Talking about real life scenarios, this change doubles
the speed of the sclp console when sending single bytes outputs
to /dev/console.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Yi Min Zhao
0c2a16a4dc s390x/pci: assert zpci always existing
If one pci device is plugged successfully, there must be a zpci device
existing. This means that during hot-unplugging a pci device, its
corresponding zpci device must be found. Therefore we use an assert to
replace current code.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Yi Min Zhao
0d36d79192 s390x/pci: return directly if create zpci failed
In the case that zpci is automatically created, we did not return
immediately on failure, which would lead to NULL pointer dereferencing.
Let's fix it.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Cornelia Huck
61823988df s390x: add compat machine for 2.8
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Peter Maydell
e87d397e5e Open 2.8 development tree
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-05 11:38:54 +01:00
Peter Maydell
1dc33ed90b Update version for v2.7.0 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-02 13:44:11 +01:00
Juergen Gross
4ada797b05 xen: use native disk xenbus protocol if possible
The qdisk implementation is using the native xenbus protocol only in
case of no protocol specified at all. As using the explicit 32- or
64-bit protocol is slower than the native one due to copying requests
not by memcpy but element for element, this is not optimal.

Correct this by using the native protocol in case word sizes of
frontend and backend match.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2016-08-30 15:01:01 -07:00
Peter Maydell
12d2c4184c Update version for v2.7.0-rc5 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 20:39:45 +01:00
Greg Kurz
56f101ecce 9pfs: handle walk of ".." in the root directory
The 9P spec at http://man.cat-v.org/plan_9/5/intro says:

All directories must support walks to the directory .. (dot-dot) meaning
parent directory, although by convention directories contain no explicit
entry for .. or . (dot).  The parent of the root directory of a server's
tree is itself.

This means that a client cannot walk further than the root directory
exported by the server. In other words, if the client wants to walk
"/.." or "/foo/../..", the server should answer like the request was
to walk "/".

This patch just does that:
- we cache the QID of the root directory at attach time
- during the walk we compare the QID of each path component with the root
  QID to detect if we're in a "/.." situation
- if so, we skip the current component and go to the next one

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 19:23:00 +01:00
Greg Kurz
805b5d98c6 9pfs: forbid . and .. in file names
According to the 9P spec http://man.cat-v.org/plan_9/5/open about the
create request:

The names . and .. are special; it is illegal to create files with these
names.

This patch causes the create and lcreate requests to fail with EINVAL if
the file name is either "." or "..".

Even if it isn't explicitly written in the spec, this patch extends the
checking to all requests that may cause a directory entry to be created:

    - mknod
    - rename
    - renameat
    - mkdir
    - link
    - symlink

The unlinkat request also gets patched for consistency (even if
rmdir("foo/..") is expected to fail according to POSIX.1-2001).

The various error values come from the linux manual pages.

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 19:21:56 +01:00
Greg Kurz
fff39a7ad0 9pfs: forbid illegal path names
Empty path components don't make sense for most commands and may cause
undefined behavior, depending on the backend.

Also, the walk request described in the 9P spec [1] clearly shows that
the client is supposed to send individual path components: the official
linux client never sends portions of path containing the / character for
example.

Moreover, the 9P spec [2] also states that a system can decide to restrict
the set of supported characters used in path components, with an explicit
mention "to remove slashes from name components".

This patch introduces a new name_is_illegal() helper that checks the
names sent by the client are not empty and don't contain unwanted chars.
Since 9pfs is only supported on linux hosts, only the / character is
checked at the moment. When support for other hosts (AKA. win32) is added,
other chars may need to be blacklisted as well.

If a client sends an illegal path component, the request will fail and
ENOENT is returned to the client.

[1] http://man.cat-v.org/plan_9/5/walk
[2] http://man.cat-v.org/plan_9/5/intro

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 19:21:39 +01:00
Peter Maydell
2b294f6b65 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* pc-bios/optionrom/Makefile fix for -O0
* revert socket_connect change

# gpg: Signature made Tue 30 Aug 2016 15:36:59 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  optionrom: cope with multiple -O options
  Revert "Change net/socket.c to use socket_*() functions"

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 17:11:14 +01:00
Paolo Bonzini
336d5881a9 optionrom: cope with multiple -O options
Reproducer:

    CFLAGS="-g3 -O0" ./configure --target-list=aarch64-softmmu,arm-softmmu --enable-vhost-net --enable-virtfs

Here CFLAGS ends up with "-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 ... -g3 -O0"
and pc-bios/optionrom/Makefile forgets to add the -O2 it needs.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-30 16:28:46 +02:00
Paolo Bonzini
616018352c Revert "Change net/socket.c to use socket_*() functions"
Since commit 7e8449594c, the socket connect code is blocking, because
calling socket_connect() without callback is blocking.  This reverts the
commit.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-30 16:28:46 +02:00
Christian Borntraeger
135a972b45 translate: early exit in tb_flush if there is no tcg
tb_flush does all kind of things, which are very tcg specific. As it
is called from some places even for KVM (e.g. gdb server) it is better
to detect these cases and do an early exit.
This also fixes a crash in the gdb server that was triggered by
commit 909eaac9bb ("tb hash: track translated blocks with qht").

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Reported-by: Richard Henderson <rth@twiddle.net>
Reported-by: Brent Baccala <cosine@freesoft.org>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-id: 1472148686-39841-1-git-send-email-borntraeger@de.ibm.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 13:30:55 +01:00
Daniel P. Berrange
b69a553b4a ui: fix refresh of VNC server surface
In previous commit

  commit c7628bff41
  Author: Gerd Hoffmann <kraxel@redhat.com>
  Date:   Fri Oct 30 12:10:09 2015 +0100

    vnc: only alloc server surface with clients connected

the VNC server was changed so that the 'vd->server' pixman
image was only allocated when a client is connected.

Since then if a client disconnects and then reconnects to
the VNC server all they will see is a black screen until
they do something that triggers a refresh. On a graphical
desktop this is not often noticed since there's many things
going on which cause a refresh. On a plain text console it
is really obvious since nothing refreshes frequently.

The problem is that the VNC server didn't update the guest
dirty bitmap, so still believes its server image is in sync
with the guest contents.

To fix this we must explicitly mark the entire guest desktop
as dirty after re-creating the server surface. Move this
logic into vnc_update_server_surface() so it is guaranteed
to be call in all code paths that re-create the surface
instead of only in vnc_dpy_switch()

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Peter Lieven <pl@kamp.de>
Tested-by: Peter Lieven <pl@kamp.de>
Message-id: 1471365032-18096-1-git-send-email-berrange@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 11:20:24 +01:00
Peter Maydell
e00da552a0 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio: fixes

some bugfixes for virtio
balloon is still broken wrt migration

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Tue 23 Aug 2016 17:33:11 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  virtio: decrement vq->inuse in virtqueue_discard()
  virtio: recalculate vq->inuse after migration

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-24 17:21:03 +01:00
Ed Maste
8c1c230a6e Fix bsd-user build after d915b7bb
Must include "qemu-version.h" for the QEMU_PKGVERSION definition.

Signed-off-by: Ed Maste <emaste@freebsd.org>
Message-id: 1471877833-52343-1-git-send-email-emaste@freebsd.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-24 16:42:40 +01:00
Stefan Hajnoczi
58a83c6149 virtio: decrement vq->inuse in virtqueue_discard()
virtqueue_discard() moves vq->last_avail_idx back so the element can be
popped again.  It's necessary to decrement vq->inuse to avoid "leaking"
the element count.

Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-08-23 19:20:24 +03:00
Stefan Hajnoczi
bccdef6b1a virtio: recalculate vq->inuse after migration
The vq->inuse field is not migrated.  Many devices don't hold
VirtQueueElements across migration so it doesn't matter that vq->inuse
starts at 0 on the destination QEMU.

At least virtio-serial, virtio-blk, and virtio-balloon migrate while
holding VirtQueueElements.  For these devices we need to recalculate
vq->inuse upon load so the value is correct.

Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-08-23 19:20:10 +03:00
Peter Maydell
d75aa4372f Update version for v2.7.0-rc4 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-22 15:29:41 +01:00
Peter Maydell
62680fad7f Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Mon 22 Aug 2016 09:06:32 BST
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  e1000e: remove internal interrupt flag
  slirp: fix segv when init failed

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-22 10:02:28 +01:00
Cao jin
e0af5a0e8b e1000e: remove internal interrupt flag
Commit 66bf7d58 removed internal msi state flag E1000E_USE_MSI, E1000E_USE_MSIX
is not necessary too, remove it now. And interrupt flag field intr_state also
can be removed now.

CC: Dmitry Fleytman <dmitry@daynix.com>
CC: Jason Wang <jasowang@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-22 16:06:08 +08:00
Marc-André Lureau
67f3280c06 slirp: fix segv when init failed
Since commit f6c2e66ae8, slirp uses an exit notifier to call
slirp_smb_cleanup. However, if init() failed, the notifier isn't added,
and removing it will fail:

==18447== Invalid write of size 8
==18447==    at 0x7EF2B5: notifier_remove (notify.c:32)
==18447==    by 0x48E80C: qemu_remove_exit_notifier (vl.c:2661)
==18447==    by 0x6A2187: net_slirp_cleanup (slirp.c:134)
==18447==    by 0x69419D: qemu_cleanup_net_client (net.c:338)
==18447==    by 0x69445B: qemu_del_net_client (net.c:401)
==18447==    by 0x6A2B81: net_slirp_init (slirp.c:366)
==18447==    by 0x6A4241: net_init_slirp (slirp.c:865)
==18447==    by 0x695C6D: net_client_init1 (net.c:1051)
==18447==    by 0x695F6E: net_client_init (net.c:1108)
==18447==    by 0x696DBA: net_init_netdev (net.c:1498)
==18447==    by 0x7F1F99: qemu_opts_foreach (qemu-option.c:1116)
==18447==    by 0x696E60: net_init_clients (net.c:1516)
==18447==  Address 0x0 is not stack'd, malloc'd or (recently) free'd

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-22 15:20:32 +08:00
Sascha Silbe
5f9f818ea8 test-logging: don't hard-code paths in /tmp
Since f6880b7f [qemu-log: support simple pid substitution for logs],
test-logging creates files with hard-coded names in /tmp. In the best
case, this prevents multiple developers from running "make check" on
the same machine. In the worst case, it allows for symlink attacks,
enabling an attacker to overwrite files that are writable to the
developer running "make check".

Instead of hard-coding the paths, create a temporary directory using
g_dir_make_tmp() and clean it up afterwards.

Fixes: f6880b7f ("qemu-log: support simple pid substitution for logs")
Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-id: 1471545963-11720-3-git-send-email-silbe@linux.vnet.ibm.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-19 12:44:11 +01:00
Sascha Silbe
5045570009 glib: add compatibility implementation for g_dir_make_tmp()
We're going to make use of g_dir_make_tmp() in test-logging. Provide a
compatibility implementation of it for glib < 2.30.

May behave differently in some edge cases (e.g. pattern only at the
end of the template, the file name is not part of the error message),
but good enough in practice.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-id: 1471545963-11720-2-git-send-email-silbe@linux.vnet.ibm.com
[PMM: removed variable "template" which caused compilation failures
 when C++ files include glib-compat.h]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-19 12:42:40 +01:00
Michal Privoznik
60c6b790fc syscall.c: Redefine IFLA_* enums
In 9c37146782 I've tried to fix a broken build with older
linux-headers. However, I didn't do it properly. The solution
implemented here is to grab the enums that caused the problem
initially, and rename their values so that they are "QEMU_"
prefixed. In order to guarantee matching values with actual
enums from linux-headers, the enums are seeded with starting
values from the original enums.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-id: 75c14d6e8a97c4ff3931d69c13eab7376968d8b4.1471593869.git.mprivozn@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-19 09:47:51 +01:00
Michal Privoznik
aee5f8f98e Revert "syscall.c: Fix build with older linux-headers"
The fix I've made there was wrong. I mean, basically what I did
there was equivalent to:

  #if 0
  some code;
  #endif

This reverts commit 9c37146782.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-id: 40d61349e445c1ad5fef795da704bf7ed6e19c86.1471593869.git.mprivozn@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-19 09:47:51 +01:00
Peter Maydell
02b1ad881c Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Thu 18 Aug 2016 14:39:31 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  block: fix possible reorder of flush operations
  block: fix deadlock in bdrv_co_flush

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-18 14:42:51 +01:00
Denis V. Lunev
156af3ac98 block: fix possible reorder of flush operations
This patch reduce CPU usage of flush operations a bit. When we have one
flush completed we should kick only next operation. We should not start
all pending operations in the hope that they will go back to wait on
wait_queue.

Also there is a technical possibility that requests will get reordered
with the previous approach. After wakeup all requests are removed from
the wait queue. They become active and they are processed one-by-one
adding to the wait queue in the same order. Though new flush can arrive
while all requests are not put into the queue.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Tested-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Message-id: 1471457214-3994-3-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-18 14:36:49 +01:00
Evgeny Yakovlev
ce83ee57f6 block: fix deadlock in bdrv_co_flush
The following commit
    commit 3ff2f67a7c
    Author: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
    Date:   Mon Jul 18 22:39:52 2016 +0300
    block: ignore flush requests when storage is clean
has introduced a regression.

There is a problem that it is still possible for 2 requests to execute
in non sequential fashion and sometimes this results in a deadlock
when bdrv_drain_one/all are called for BDS with such stalled requests.

1. Current flushed_gen and flush_started_gen is 1.
2. Request 1 enters bdrv_co_flush to with write_gen 1 (i.e. the same
   as flushed_gen). It gets past flushed_gen != flush_started_gen and
   sets flush_started_gen to 1 (again, the same it was before).
3. Request 1 yields somewhere before exiting bdrv_co_flush
4. Request 2 enters bdrv_co_flush with write_gen 2. It gets past
   flushed_gen != flush_started_gen and sets flush_started_gen to 2.
5. Request 2 runs to completion and sets flushed_gen to 2
6. Request 1 is resumed, runs to completion and sets flushed_gen to 1.
   However flush_started_gen is now 2.

From here on out flushed_gen is always != to flush_started_gen and all
further requests will wait on flush_queue. This change replaces
flush_started_gen with an explicitly tracked active flush request.

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-id: 1471457214-3994-2-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-18 14:36:49 +01:00
Peter Maydell
5844365fe8 Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Thu 18 Aug 2016 06:36:16 BST
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  net/net: properly handle multiple packets in net_fill_rstate()
  net: vmxnet: use g_new for pkt initialisation

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-18 10:56:41 +01:00
Peter Maydell
4b887ae658 Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
Fix 'make docker-test-mingw@fedora'

Peter,

This is the single patch that stalls patchew's mingw testing. Since it
is small and trivial, let's have it in 2.7.

Fam

# gpg: Signature made Wed 17 Aug 2016 13:13:53 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  curl: Cast fd to int for DPRINTF

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-18 10:27:57 +01:00
Zhang Chen
e9e0a5854b net/net: properly handle multiple packets in net_fill_rstate()
When network is busy, we will receive multiple packets at one time. In
that situation, we should keep trying to do the receiving instead of
finalizing only the first packet.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-18 12:20:57 +08:00
Li Qiang
47882fa497 net: vmxnet: use g_new for pkt initialisation
When network transport abstraction layer initialises pkt, the maximum
fragmentation count is not checked. This could lead to an integer
overflow causing a NULL pointer dereference. Replace g_malloc() with
g_new() to catch the multiplication overflow.

Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-18 12:05:18 +08:00
Fam Zheng
92b6a16087 curl: Cast fd to int for DPRINTF
Currently "make docker-test-mingw@fedora" has a warning like:

    /tmp/qemu-test/src/block/curl.c: In function 'curl_sock_cb':
    /tmp/qemu-test/src/block/curl.c:172:6: warning: format '%d' expects
    argument of type 'int', but argument 4 has type 'curl_socket_t {aka long
    long unsigned int}'
         DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
          ^
    cc1: all warnings being treated as errors

Cast to int to suppress it.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1470027888-24381-1-git-send-email-famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2016-08-17 19:57:54 +08:00
Peter Maydell
5f0e775348 Update version for v2.7.0-rc3 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 17:34:30 +01:00
Peter Maydell
9fea273c85 linux-user: Fix llseek with high bit of offset_low set
The llseek syscall takes two 32-bit arguments, offset_high
and offset_low, which must be combined to form a single
64-bit offset. Unfortunately we were combining them with
   (uint64_t)arg2 << 32) | arg3
and arg3 is a signed type; this meant that when promoting
arg3 to a 64-bit type it would be sign-extended. The effect
was that if the offset happened to have bit 31 set then
this bit would get sign-extended into all of bits 63..32.
Explicitly cast arg3 to abi_ulong to avoid the erroneous
sign extension.

Reported-by: Chanho Park <parkch98@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Tested-by: Chanho Park <parkch98@gmail.com>
Message-id: 1470938379-1133-1-git-send-email-peter.maydell@linaro.org
2016-08-16 16:42:03 +01:00
Michal Privoznik
9c37146782 syscall.c: Fix build with older linux-headers
In c5dff280 we tried to make us understand netlink messages more.
So we've added a code that does some translation. However, the
code assumed linux-headers to be at least version 4.4 of it
because most of the symbols there (if not all of them) were added
in just that release. This, however, breaks build on systems with
older versions of the package.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Message-id: 23806aac6db3baf7e2cdab4c62d6e3468ce6b4dc.1471340849.git.mprivozn@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 16:14:48 +01:00
Marc-André Lureau
1451a7a673 qmp-commands.hx: remove outdated note
input-send-event is now stable since
6575ccddf4.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-id: 20160811112041.18616-1-marcandre.lureau@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 15:19:52 +01:00
Peter Maydell
725092ede5 Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
target-i386: kernel_irqchip=off fix for KVM

# gpg: Signature made Tue 16 Aug 2016 12:55:42 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request:
  target-i386: kvm: Report kvm_pv_unhalt as unsupported w/o kernel_irqchip

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 13:03:16 +01:00
Eduardo Habkost
648774779a target-i386: kvm: Report kvm_pv_unhalt as unsupported w/o kernel_irqchip
The kvm_pv_unhalt feature doesn't work if kernel_irqchip is
disabled, so we need to report it as unsupported.

Tested-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-08-16 08:49:53 -03:00
Thomas Huth
1f8b56e7ce slirp: Rename "struct arphdr" to "struct slirp_arphdr"
struct arphdr is already used by the system headers on OpenBSD
and thus QEMU does not compile here anymore. Fix it by renaming
our struct to slirp_arphdr instead.

Reported-by: Brad Smith
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-id: 1471249494-17392-1-git-send-email-thuth@redhat.com
Buglink: https://bugs.launchpad.net/qemu/+bug/1613133
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 12:17:36 +01:00
Marc-André Lureau
1dc8a6695c char: fix waiting for TLS and telnet connection
Since commit d7a04fd7d5, tcp_chr_wait_connected() was introduced,
so vhost-user could wait until a backend started successfully. In
vhost-user case, the chr socket must be plain unix, and the chr+vhost
setup happens synchronously during qemu startup.

However, with TLS and telnet socket, initial socket setup happens
asynchronously, and s->connected is not set after the socket is
accepted. In order for tcp_chr_wait_connected() to not keep accepting
new connections and proceed with the last accepted socket, it can
check for s->ioc instead.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20160816083332.15088-1-marcandre.lureau@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 11:50:55 +01:00
Daniel P. Berrange
8afc224ffe virtio-gpu: fix missing log.h include file
The virtio-gpu.h file defines a macro VIRTIO_GPU_FILL_CMD
which includes a call to qemu_log_mask, but does not
include qemu/log.h. In a default configure, it is lucky
and gets qemu/log.h indirectly due to the 'log' trace
backend being enabled. If that trace backend is disabled
though, eg

 ./configure --enable-trace-backends=nop

Then the build will fail:

In file included from /home/berrange/src/virt/qemu/hw/display/virtio-gpu-3d.c:19:0:
/home/berrange/src/virt/qemu/hw/display/virtio-gpu-3d.c: In function ‘virgl_cmd_create_resource_2d’:
/home/berrange/src/virt/qemu/include/hw/virtio/virtio-gpu.h:138:13: error: implicit declaration of function ‘qemu_log_mask’ [-Werror=implicit-function-declaration]
             qemu_log_mask(LOG_GUEST_ERROR,                              \
             ^
/home/berrange/src/virt/qemu/hw/display/virtio-gpu-3d.c:34:5: note: in expansion of macro ‘VIRTIO_GPU_FILL_CMD’
     VIRTIO_GPU_FILL_CMD(c2d);
     ^~~~~~~~~~~~~~~~~~~
/home/berrange/src/virt/qemu/hw/display/virtio-gpu-3d.c:34:5: error: nested extern declaration of ‘qemu_log_mask’ [-Werror=nested-externs]
In file included from /home/berrange/src/virt/qemu/hw/display/virtio-gpu-3d.c:19:0:
/home/berrange/src/virt/qemu/include/hw/virtio/virtio-gpu.h:138:27: error: ‘LOG_GUEST_ERROR’ undeclared (first use in this function)
             qemu_log_mask(LOG_GUEST_ERROR,                              \

[snip many more errors]

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1470648700-3474-1-git-send-email-berrange@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 11:21:31 +01:00
Peter Maydell
2d1c8d5456 Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160816' into staging
Build fix for the ccw bios (bios itself not rebuilt).

# gpg: Signature made Tue 16 Aug 2016 08:00:16 BST
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20160816:
  pc-bios/s390-ccw.img: Fix build

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 10:45:00 +01:00
Peter Maydell
66940d7491 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches for 2.7.0-rc3

# gpg: Signature made Mon 15 Aug 2016 14:55:46 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  iotests: Test case for wrong runtime option types
  block/nbd: Store runtime option values
  block/blkdebug: Store config filename
  block/nbd: Use QemuOpts for runtime options
  block/ssh: Use QemuOpts for runtime options

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 09:32:40 +01:00
Christian Borntraeger
c86c03cfd2 pc-bios/s390-ccw.img: Fix build
Since
commit a9c87304b7 ("build-sys: fix building with make CFLAGS=.. argument")

pc-bios/s390-ccw.img build might fail with

--- snip ---
main.o: In function `virtio_setup':
qemu/pc-bios/s390-ccw/main.c:117: undefined reference to `__stack_chk_fail'
--- snip ---

Changing the CFLAGS to QEMU_CFLAGS does the trick. We also need to
add -fno-strict-aliasing as this was filtered out.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <1471258997-5811-1-git-send-email-borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-08-16 08:52:02 +02:00
Peter Maydell
f3b9e787ae Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160815' into staging
ppc patch queue for 2016-08-15

Just a single patch here, I hope this is the last ppc / spapr fix to
squeeze into qemu-2.7.

# gpg: Signature made Mon 15 Aug 2016 07:46:36 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160815:
  ppc: parse cpu features once

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-15 21:48:03 +01:00
Peter Maydell
e5bfef86fe Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20160812-tag-2' into staging
Xen 2016/08/12, fixed commit message

# gpg: Signature made Sat 13 Aug 2016 00:39:09 BST
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20160812-tag-2:
  xen: handle inbound migration of VMs without ioreq server pages
  Xen: fix converity warning of xen_pt_config_init()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-15 19:04:51 +01:00
Peter Maydell
aba5d97664 Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Fri 12 Aug 2016 11:48:03 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  trace-events: fix first line comment in trace-events

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-15 18:27:51 +01:00
Peter Maydell
e57218b6ed pc-bios/optionrom: Fix OpenBSD build with better detection of linker emulation
The various host OSes are irritatingly variable about the name
of the linker emulation we need to pass to ld's -m option to
build the i386 option ROMs. Instead of doing this via a
CONFIG ifdef, check in configure whether any of the emulation
names we know about will work and pass the right answer through
to the makefile. If we can't find one, we fall back to not trying
to build the option ROMs, in the same way we would for a non-x86
host platform.

This is in particular necessary to unbreak the build on OpenBSD,
since it wants a different answer to FreeBSD and we don't have
an existing CONFIG_ variable that distinguishes the two.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Sean Bruno <sbruno@freebsd.org>
Message-id: 1470672688-6754-1-git-send-email-peter.maydell@linaro.org
2016-08-15 17:21:30 +01:00
Pranith Kumar
dfd6076710 softfloat: Fix warn about implicit conversion from int to int8_t
Change the flag type to 'uint8_t' to fix the implicit conversion error.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Message-id: 20160810185502.32015-1-bobby.prani@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-15 16:15:38 +01:00
Michael S. Tsirkin
94c9cb31c0 Revert "vhost-user: Attempt to fix a race with set_mem_table."
This reverts commit 28ed5ef163.

I still think it's the right thing to do, but
tests have been failing sporadically.

Revert for now, and hope to fix it before the release.

Cc: Prerna Saxena <prerna.saxena@nutanix.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Marc-André Lureau <mlureau@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Message-id: 1471268075-3425-1-git-send-email-mst@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-15 15:12:21 +01:00
Max Reitz
7d3e693646 iotests: Test case for wrong runtime option types
Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-15 15:52:29 +02:00
Max Reitz
03504d05f0 block/nbd: Store runtime option values
Store the runtime option values in the BDRVNBDState so they can later be
used in nbd_refresh_filename() without having to directly access the
options QDict which may contain values of non-string types.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-15 15:52:29 +02:00
Max Reitz
036990d72b block/blkdebug: Store config filename
Store the configuration file's filename so it can later be used in
bdrv_refresh_filename() without having to directly access the options
QDict which may contain a value of a non-string type.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-15 15:52:28 +02:00
Max Reitz
7ccc44fd7d block/nbd: Use QemuOpts for runtime options
Using QemuOpts will prevent qemu from crashing if the input options have
not been validated (which is the case when they are specified on the
command line or in a json: filename) and some have the wrong type.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-15 15:52:28 +02:00
Max Reitz
8a6a80896d block/ssh: Use QemuOpts for runtime options
Using QemuOpts will prevent qemu from crashing if the input options have
not been validated (which is the case when they are specified on the
command line or in a json: filename) and some have the wrong type.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-15 15:52:28 +02:00
Greg Kurz
e703d2f71c ppc: parse cpu features once
Considering that features are converted to global properties and
global properties are automatically applied to every new instance
of created CPU (at object_new() time), there is no point in
parsing cpu_model string every time a CPU created. So move
parsing outside CPU creation loop and do it only once.

Parsing also should be done before any CPU is created so that
features would affect the first CPU a well.

This patch does that for all PowerPC machine types.

It is based on previous work from Bharata:

https://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg07564.html

Signed-off-by: Greg Kurz <groug@kaod.org>
[clg: only kept the fix for the spapr platform. support for other
      platform will be added in 2.8 ]
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Tested-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-13 17:32:58 +10:00
Paul Durrant
b7665c6027 xen: handle inbound migration of VMs without ioreq server pages
VMs created on older versions on Xen will not have been provisioned with
pages to support creation of non-default ioreq servers. In this case
the ioreq server API is not supported and QEMU's only option is to fall
back to using the default ioreq server pages as it did prior to
commit 3996e85c ("Xen: Use the ioreq-server API when available").

This patch therefore changes the code in xen_common.h to stop considering
a failure of xc_hvm_create_ioreq_server() as a hard failure but simply
as an indication that the guest is too old to support the ioreq server
API. Instead a boolean is set to cause reversion to old behaviour such
that the default ioreq server is then used.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
Acked-by: Stefano Stabellini <sstabellini@kernel.org>
2016-08-12 16:38:30 -07:00
Cao jin
c4f68f0b52 Xen: fix converity warning of xen_pt_config_init()
emu_regs is a pointer, ARRAY_SIZE doesn't return what we expect.
Since the remaining message is enough for debugging, so just remove it.
Also tweaked the message a little.

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Acked-by: Stefano Stabellini <sstabellini@kernel.org>
2016-08-12 16:38:18 -07:00
Pranith Kumar
6bbbb0ac13 target-arm: Fix warn about implicit conversion
Clang warns about an implicit conversion as follows:

/mnt/devops/code/qemu/target-arm/neon_helper.c:1075:1: warning: implicit conversion from 'int' to 'int8_t' (aka 'signed char') changes value from 128 to -128 [-Wconstant-conversion]
NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/mnt/devops/code/qemu/target-arm/neon_helper.c:116:83: note: expanded from macro 'NEON_VOP_ENV'
uint32_t HELPER(glue(neon_,name))(CPUARMState *env, uint32_t arg1, uint32_t arg2) \
                                                                                  ^
/mnt/devops/code/qemu/target-arm/neon_helper.c:106:5: note: expanded from macro '\
NEON_VOP_BODY'
    NEON_DO##n; \
    ^~~~~~~~~~
<scratch space>:21:1: note: expanded from here
NEON_DO4
^~~~~~~~
/mnt/devops/code/qemu/target-arm/neon_helper.c:93:5: note: expanded from macro 'NEON_DO4'
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/mnt/devops/code/qemu/target-arm/neon_helper.c:1054:23: note: expanded from macro 'NEON_FN'
            dest = (1 << (sizeof(src1) * 8 - 1)); \
                 ~  ~~^~~~~~~~~~~~~~~~~~~~~~~~~

Fix it by casting to appropriate type.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-12 11:12:24 +01:00
Laurent Vivier
e723b87103 trace-events: fix first line comment in trace-events
Documentation is docs/tracing.txt instead of docs/trace-events.txt.

find . -name trace-events -exec \
     sed -i "s?See docs/trace-events.txt for syntax documentation.?See docs/tracing.txt for syntax documentation.?" \
     {} \;

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Message-id: 1470669081-17860-1-git-send-email-lvivier@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-12 10:36:01 +01:00
Peter Maydell
28b874429b Merge remote-tracking branch 'remotes/amit-migration/tags/migration-for-2.7-7' into staging
Migration:
 - couple of bug fixes
 - couple of typo fixes

# gpg: Signature made Thu 11 Aug 2016 12:36:00 BST
# gpg:                using RSA key 0xEB0B4DFC657EF670
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"
# Primary key fingerprint: 48CA 3722 5FE7 F4A8 B337  2735 1E9A 3B5F 8540 83B6
#      Subkey fingerprint: CC63 D332 AB8F 4617 4529  6534 EB0B 4DFC 657E F670

* remotes/amit-migration/tags/migration-for-2.7-7:
  migration/socket: fix typo in file header
  migration: fix live migration failure with compression
  migration: mmap error check fix
  migration/ram: fix typo

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-11 17:53:35 +01:00
Peter Maydell
d915b7bb4c Update ancient copyright string in -version output
Currently the -version command line argument prints a string ending
with "Copyright (c) 2003-2008 Fabrice Bellard".  This is now some
eight years out of date; abstract it out of the several places that
print the string and update it to:

Copyright (c) 2003-2016 Fabrice Bellard and the QEMU Project developers

to reflect the work by all the QEMU Project contributors over the
last decade.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1470309276-5012-1-git-send-email-peter.maydell@linaro.org
2016-08-11 16:24:53 +01:00
Peter Maydell
bea048dcb9 Merge remote-tracking branch 'remotes/amit/tags/vser-for-2.7-1' into staging
virtio-console: fix receiving data from guest

# gpg: Signature made Thu 11 Aug 2016 12:17:55 BST
# gpg:                using RSA key 0xEB0B4DFC657EF670
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"
# Primary key fingerprint: 48CA 3722 5FE7 F4A8 B337  2735 1E9A 3B5F 8540 83B6
#      Subkey fingerprint: CC63 D332 AB8F 4617 4529  6534 EB0B 4DFC 657E F670

* remotes/amit/tags/vser-for-2.7-1:
  virtio-console: set frontend open permanently for console devs

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-11 13:26:35 +01:00
Cao jin
474c624ddf migration/socket: fix typo in file header
Code of inet socket & unix socket is merged together.
Also add some newlines, make code block well separated.

Cc: Daniel P. Berrange <berrange@redhat.com>
Cc: Juan Quintela <quintela@redhat.com>
Cc: Amit Shah <amit.shah@redhat.com>

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-Id: <1469696074-12744-4-git-send-email-caoj.fnst@cn.fujitsu.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-08-11 17:03:51 +05:30
Liang Li
787d134fb1 migration: fix live migration failure with compression
Because of commit 11808bb0c4, which remove some condition checks
of 'f->ops->writev_buffer', 'qemu_put_qemu_file' should be enhanced
to clear the 'f_src->iovcnt', or 'f_src->iovcnt' may exceed the
MAX_IOV_SIZE which will break live migration. This should be fixed.

Signed-off-by: Liang Li <liang.z.li@intel.com>
Reported-by: Jinshi Zhang <jinshi.c.zhang@intel.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1470702146-24399-1-git-send-email-liang.z.li@intel.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-08-11 16:59:53 +05:30
Evgeny Yakovlev
0e8b3cdfbc migration: mmap error check fix
mmap man page:
"On success, mmap() returns a pointer to the mapped area. On error, the
value MAP_FAILED (that is, (void *) -1) is returned, and errno  is  set
to indicate the cause of the error."

The check in postcopy_get_tmp_page is definitely wrong and should be
fixed.

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Amit Shah <amit.shah@redhat.com>
Message-Id: <1469785705-16670-1-git-send-email-den@openvz.org>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-08-11 16:59:38 +05:30
Cao jin
e110aa919a migration/ram: fix typo
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-Id: <1469776231-23820-1-git-send-email-caoj.fnst@cn.fujitsu.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-08-11 16:59:33 +05:30
Daniel P. Berrange
bce6261eb2 virtio-console: set frontend open permanently for console devs
The virtio-console.c file handles both serial consoles
and interactive consoles, since they're backed by the
same device model.

Since serial devices are expected to be reliable and
need to notify the guest when the backend is opened
or closed, the virtio-console.c file wires up support
for chardev events. This affects both serial consoles
and interactive consoles, using a network connection
based chardev backend such as 'socket', but not when
using a PTY based backend or plain 'file' backends.

When the host side is not connected the handle_output()
method in virtio-serial-bus.c will drop any data sent
by the guest, before it even reaches the virtio-console.c
code. This means that if the chardev has a logfile
configured, the data will never get logged.

Consider for example, configuring a x86_64 guest with a
plain UART serial port

  -chardev socket,id=charserial1,host=127.0.0.1,port=9001,server,nowait,logfile=console1.log,logappend=on
  -device isa-serial,chardev=charserial1,id=serial1

vs a s390 guest which has to use the virtio-console port

  -chardev socket,id=charconsole1,host=127.0.0.1,port=9000,server,nowait,logfile=console2.log,logappend=on
  -device virtconsole,chardev=charconsole1,id=console1

The isa-serial one gets data written to the log regardless
of whether a client is connected, while the virtioconsole
one only gets data written to the log when a client is
connected.

There is no need for virtio-serial-bus.c to aggressively
drop the data for console devices, as the chardev code is
prefectly capable of discarding the data itself.

So this patch changes virtconsole devices so that they
are always marked as having the host side open. This
ensures that the guest OS will always send any data it
has (Linux virtio-console hvc driver actually ignores
the host open state and sends data regardless, but we
should not rely on that), and also prevents the
virtio-serial-bus code prematurely discarding data.

The behaviour of virtserialport devices is *not* changed,
only virtconsole, because for the former, it is important
that the guest OSknow exactly when the host side is opened
/ closed so it can do any protocol re-negotiation that may
be required.

Fixes bug: https://bugs.launchpad.net/qemu/+bug/1599214

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1470241360-3574-2-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-08-11 16:38:58 +05:30
Peter Maydell
144a6db0b0 Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Thu 11 Aug 2016 11:35:33 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  linux-aio: Handle io_submit() failure gracefully

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-11 11:48:46 +01:00
Kevin Wolf
44713c9e85 linux-aio: Handle io_submit() failure gracefully
It is generally not expected that io_submit() fails other than with
-EAGAIN, but corner cases like SELinux refusing I/O when permissions are
revoked are still possible. In this case, we shouldn't abort, but just
return an I/O error for the request.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 1470741619-23231-1-git-send-email-kwolf@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-11 09:42:35 +01:00
Peter Maydell
d08306dc42 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio/vhost: fixes

some bugfixes for virtio/vhost

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Wed 10 Aug 2016 16:16:22 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  vhost-user: Attempt to fix a race with set_mem_table.
  vhost-user: Introduce a new protocol feature REPLY_ACK.
  vhost: check for vhost_ops before using.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-10 17:14:35 +01:00
Peter Maydell
4b3e5c06a1 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* pc-bios/optionrom/Makefile fixes
* warning fixes for __atomic_load and -1 << x in clang
* missed interrupt fix from Gonglei
* checkpatch fix from Radim and myself

# gpg: Signature made Wed 10 Aug 2016 14:54:31 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  checkpatch: default to success if only warnings
  checkpatch: bump most warnings to errors
  CODING_STYLE, checkpatch: update line length rules
  checkpatch: check for CVS keywords on all sources
  checkpatch: tweak the files in which TABs are checked
  timer: set vm_clock disabled default
  checkpatch: ignore automatically imported Linux headers
  clang: Fix warning reg. expansion to 'defined'
  Disable warn about left shifts of negative values
  atomic: strip "const" from variables declared with typeof
  optionrom: fix compilation with mingw docker target
  optionrom: add -fno-stack-protector
  build-sys: fix building with make CFLAGS=.. argument
  linuxboot_dma: avoid guest ABI breakage on gcc vs. clang compilation

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-10 15:59:08 +01:00
Prerna Saxena
28ed5ef163 vhost-user: Attempt to fix a race with set_mem_table.
The set_mem_table command currently does not seek a reply. Hence, there is
no easy way for a remote application to notify to QEMU when it finished
setting up memory, or if there were errors doing so.

As an example:
(1) Qemu sends a SET_MEM_TABLE to the backend (eg, a vhost-user net
application). SET_MEM_TABLE does not require a reply according to the spec.
(2) Qemu commits the memory to the guest.
(3) Guest issues an I/O operation over a new memory region which was configured on (1).
(4) The application has not yet remapped the memory, but it sees the I/O request.
(5) The application cannot satisfy the request because it does not know about those GPAs.

While a guaranteed fix would require a protocol extension (committed separately),
a best-effort workaround for existing applications is to send a GET_FEATURES
message before completing the vhost_user_set_mem_table() call.
Since GET_FEATURES requires a reply, an application that processes vhost-user
messages synchronously would probably have completed the SET_MEM_TABLE before replying.

Signed-off-by: Prerna Saxena <prerna.saxena@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-08-10 17:47:29 +03:00
Prerna Saxena
ca525ce561 vhost-user: Introduce a new protocol feature REPLY_ACK.
This introduces the VHOST_USER_PROTOCOL_F_REPLY_ACK.

If negotiated, client applications should send a u64 payload in
response to any message that contains the "need_reply" bit set
on the message flags. Setting the payload to "zero" indicates the
command finished successfully. Likewise, setting it to "non-zero"
indicates an error.

Currently implemented only for SET_MEM_TABLE.

Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Prerna Saxena <prerna.saxena@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-08-10 17:47:29 +03:00
Ilya Maximets
ca10203cde vhost: check for vhost_ops before using.
'vhost_set_vring_enable()' tries to call function using pointer to
'vhost_ops' which can be already zeroized in 'vhost_dev_cleanup()'
while vhost disconnection.

Fix that by checking 'vhost_ops' before using. This fixes QEMU crash
on calling 'ethtool -L eth0 combined 2' if vhost disconnected.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-08-10 17:47:29 +03:00
Peter Maydell
d578cca333 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160810' into staging
ppc patch queue for 2016-08-10

Here are some more last minute PAPR and ppc related fixes for
qemu-2.7.  One patch makes compressed memory dumps work with guest
kernels using page sizes up to 64KiB.  This is important since most
current pseries guests use a 64KiB default page size.  The remainder
fix a regression with handling of CPU aliases which causes serious
problem for libvirt.

# gpg: Signature made Wed 10 Aug 2016 06:44:27 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160810:
  ppc/kvm: Register also a generic spapr CPU core family type
  ppc/kvm: Do not mess up the generic CPU family registration
  hw/ppc/spapr: Look up CPU alias names instead of hard-coding the aliases
  ppc: Introduce a function to look up CPU alias strings
  spapr: remove extra type variable
  ppc64: fix compressed dump with pseries kernel

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-10 15:13:30 +01:00
Paolo Bonzini
141de88654 checkpatch: default to success if only warnings
CHK-level checks have been removed from checkpatch or bumped to
errors, so there is no effect anymore for --strict/--subjective.
Furthermore, even most WARNs have been bumped to errors, with
WARN only reserved to things that patchew probably ought not
to complain about (and that maintainers probably will notice
anyway during review if they are extreme).

Default to exiting with success even if there are WARN-level
failures, and cause --strict to fail for warnings.  Maintainers
that want to have a strict 80-character limit for their subsystem
can add it to a commit hook for example.

The --subjective synonym is removed.

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-10 12:44:51 +02:00
Paolo Bonzini
c2df878325 checkpatch: bump most warnings to errors
This only leaves a warning-level message for the extra-long lines
soft limit.  Everything else is bumped up.

In the future warnings can be added for checks that can have false
positives.

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-10 12:44:48 +02:00
Paolo Bonzini
8fbe3d1fcf CODING_STYLE, checkpatch: update line length rules
Line lengths above 80 characters do exist.  They are rare, but
they happen from time to time.  An ignored rule is worse than an
exception to the rule, so do the latter.

Some on the list expressed their preference for a soft limit that
is slightly lower than 80 characters, to account for extra characters
in unified diffs (including three-way diffs) and for email quoting.
However, there was no consensus on this so keep the 80-character
soft limit and add a hard limit at 90.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-10 12:22:33 +02:00
Paolo Bonzini
93eb8e31f3 checkpatch: check for CVS keywords on all sources
These should apply to all files, not just C/C++.  Tweak the regular
expression to check for whole words, to avoid false positives on Perl
variables starting with "Id".

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-10 11:10:03 +02:00
Paolo Bonzini
906fb135e4 checkpatch: tweak the files in which TABs are checked
Include Python and shell scripts, and make an exception for Perl
scripts we imported from Linux or elsewhere.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-10 11:09:54 +02:00
Thomas Huth
d11b268e17 ppc/kvm: Register also a generic spapr CPU core family type
There is a regression with the "-cpu" parameter introduced by
the spapr CPU hotplug code: We used to allow to specify a
"CPU family" name with the "-cpu" parameter when running on KVM so
that the user does not need to know the gory details of the exact
CPU version of the host CPU. For example, it was possible to
use "-cpu POWER8" on a POWER8E host CPU. This behavior does not
work anymore with the new hot-pluggable spapr-cpu-core types.
Since libvirt already heavily depends on the old behavior, this
is quite a severe regression in the QEMU parameter interface.
Let's fix it by supporting a CPU family type for the spapr-cpu-core
on KVM, too.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1363812
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:12:20 +10:00
Thomas Huth
9c83fc2e8e ppc/kvm: Do not mess up the generic CPU family registration
The code for registering the sPAPR CPU host core type has been
added inbetween the generic CPU host core type and the generic
CPU family type. That way the instance_init and the class_init
information got lost when registering the generic CPU family
type. Fix it by moving the generic family registration before
the spapr cpu core registration code.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:12:20 +10:00
Thomas Huth
4babfaf05d hw/ppc/spapr: Look up CPU alias names instead of hard-coding the aliases
Hard-coding the CPU alias names in the spapr_cores[] array has
two big disadvantages:

1) We register a real type with the CPU alias name in
   spapr_cpu_core_register_types() - this prevents us from registering
   a CPU family name in kvm_ppc_register_host_cpu_type() with the same
   name (as we do it for the non-hotpluggable CPU types).

2) It's quite cumbersome to maintain the aliases here in sync with the
   ppc_cpu_aliases list from target-ppc/cpu-models.c.

So let's simply add proper alias lookup to the spapr cpu core code,
too (by checking whether the given model can be used directly, and
if not by trying to look up the given model as an alias name instead).

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:12:20 +10:00
Thomas Huth
caf6316de9 ppc: Introduce a function to look up CPU alias strings
We will need this function to look up the aliases in the
spapr-cpu-core code, too.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:12:20 +10:00
Cédric Le Goater
caebf37859 spapr: remove extra type variable
The sPAPR CPU core typename is already available in the upper
block. Let's use it and move the check upward also.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:12:20 +10:00
Laurent Vivier
760d88d1d0 ppc64: fix compressed dump with pseries kernel
If we don't provide the page size in target-ppc:cpu_get_dump_info(),
the default one (TARGET_PAGE_SIZE, 4KB) is used to create
the compressed dump. It works fine with Macintosh, but not with
pseries as the kernel default page size is 64KB.

Without this patch, if we generate a compressed dump in the QEMU monitor:

    (qemu) dump-guest-memory -z qemu.dump

This dump cannot be read by crash:

    # crash vmlinux qemu.dump
    ...
    WARNING: cannot translate vmemmap kernel virtual addresses:
             commands requiring page structure contents will fail
    ...

Page_size is used to determine the dumpfile's block size. The
block size needs to be at least the page size, but a multiple of page
size works fine too. For PPC64, linux supports either 4KB or 64KB software
page size. So we define the page_size to 64KB.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:05:05 +10:00
Gonglei
3fdd0ee393 timer: set vm_clock disabled default
(commit 80dcfb8532)
Upon migration, the code use a timer based on vm_clock for 1ns
in the future from post_load to do the event send in case host_connected
differs between migration source and target.

However, it's not guaranteed that the apic is ready to inject irqs into
the guest, and the irq line remained high, resulting in any future interrupts
going unnoticed by the guest as well.

That's because 1) the migration coroutine is not blocked when it get EAGAIN
while reading QEMUFile. 2) The vm_clock is enabled default currently, it doesn't
rely on the calling of vm_start(), that means vm_clock timers can run before
VCPUs are running.

So, let's set the vm_clock disabled default, keep the initial intention of
design for vm_clock timers.

Meanwhile, change the test-aio usecase, using QEMU_CLOCK_REALTIME instead of
QEMU_CLOCK_VIRTUAL as the block code does.

CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: qemu-stable@nongnu.org
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Message-Id: <1470728955-90600-1-git-send-email-arei.gonglei@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Radim Krčmář
93bf13c6df checkpatch: ignore automatically imported Linux headers
Linux uses tabs for indentation and checkpatch always complained about
automatically imported headers.  update-linux-headers.sh could be modified to
expand tabs, but there is no real reason to complain about any ugly code in
Linux headers, so skip all hunk-related checks.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Pranith Kumar
2368635d39 clang: Fix warning reg. expansion to 'defined'
Clang produces the following warning. The warning is detailed here:
https://reviews.llvm.org/D15866. Fix the warning.

/home/pranith/devops/code/qemu/hw/display/qxl.c:507:5: warning: macro expansion producing 'defined' has undefined behavior [-Wexpansion-to-defined]
    ^
/home/pranith/devops/code/qemu/include/ui/qemu-spice.h:46:5: note: expanded from macro 'SPICE_NEEDS_SET_MM_TIME'
  (!defined(SPICE_SERVER_VERSION) || (SPICE_SERVER_VERSION < 0xc06))
    ^
/home/pranith/devops/code/qemu/hw/display/qxl.c:1074:5: warning: macro expansion producing 'defined' has undefined behavior [-Wexpansion-to-defined]
    ^
/home/pranith/devops/code/qemu/include/ui/qemu-spice.h:46:5: note: expanded from macro 'SPICE_NEEDS_SET_MM_TIME'
  (!defined(SPICE_SERVER_VERSION) || (SPICE_SERVER_VERSION < 0xc06))

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Pranith Kumar
435405ac59 Disable warn about left shifts of negative values
It seems like there's no good reason for the compiler to exploit the
undefinedness of left shifts.  GCC explicitly documents that they do not
use at all this possibility and, while they also say this is subject
to change, they have been saying this for 10 years (since the wording
appeared in the GCC 4.0 manual).

Disable these warnings by passing in -Wno-shift-negative-value.

Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[pranith: forward-port part of patch to 2.7]
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
2016-08-09 22:57:36 +02:00
Paolo Bonzini
5927ed846a atomic: strip "const" from variables declared with typeof
With the latest clang, we have the following warning:

    /home/pranith/devops/code/qemu/include/qemu/seqlock.h:62:21: warning: passing 'typeof (*&sl->sequence) *' (aka 'const unsigned int *') to parameter of type 'unsigned int *' discards qualifiers [-Wincompatible-pointer-types-discards-qualifiers]
        return unlikely(atomic_read(&sl->sequence) != start);
                        ^~~~~~~~~~~~~~~~~~~~~~~~~~
    /home/pranith/devops/code/qemu/include/qemu/atomic.h:58:25: note: expanded from macro 'atomic_read'
        __atomic_load(ptr, &_val, __ATOMIC_RELAXED);     \
                           ^~~~~

Stripping const is a bit tricky due to promotions, but it is doable
with either C11 _Generic or GCC extensions.  Use the latter.

Reported-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[pranith: Add conversion for bool type]
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Paolo Bonzini
9d4cd7b4ed optionrom: fix compilation with mingw docker target
Two fixes are needed.  First, mingw does not have -D_FORTIFY_SOURCE,
hence --enable-debug disables optimization.  This is not acceptable
for ROMs, which should override CFLAGS to force inclusion of -O2.

Second, PE stores global constructors and destructors using the
following linker script snippet:

     ___CTOR_LIST__ = .; __CTOR_LIST__ = . ;
			LONG (-1);*(.ctors); *(.ctor); *(SORT(.ctors.*));  LONG (0);
     ___DTOR_LIST__ = .; __DTOR_LIST__ = . ;
			LONG (-1); *(.dtors); *(.dtor); *(SORT(.dtors.*));  LONG (0);

The LONG directives cause the .img files to be 16 bytes too large;
the recently added check to signrom.py catches this.  To fix this,
replace -T and -e options with a linker script.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Paolo Bonzini
b0e8f5cadc optionrom: add -fno-stack-protector
This is required by OpenBSD.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Marc-André Lureau
a9c87304b7 build-sys: fix building with make CFLAGS=.. argument
When calling make with a CFLAGS=.. argument, the -g/-O filter is not
applied, which may result with build failure with ASAN for example. It
could be solved with an 'override' directive on CFLAGS, but that would
actually prevent setting different CFLAGS manually.

Instead, filter the CFLAGS argument from the top-level Makefile (so
you could still call make with a different CFLAGS argument on a
rom/Makefile manually)

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20160805082421.21994-2-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Paolo Bonzini
7f2569246c linuxboot_dma: avoid guest ABI breakage on gcc vs. clang compilation
Recent GCC compiles linuxboot_dma.c to 921 bytes, while CentOS 6 needs
1029 and clang needs 1527.  Because the size of the ROM, rounded to the
next 512 bytes, must match, this causes the API to break between a <1K
ROM and one that is bigger.

We want to make the ROM 1.5 KB in size, but it's better to make clang
produce leaner ROMs, because currently it is worryingly close to the limit.
To fix this prevent clang's happy inlining (which -Os cannot prevent).
This only requires adding a noinline attribute.

Second, the patch makes sure that the ROM has enough padding to prevent
ABI breakage on different compilers.  The size is now hardcoded in the file
that is passed to signrom.py, as was the case before commit 6f71b77
("scripts/signrom.py: Allow option ROM checksum script to write the size
header.", 2016-05-23); signrom.py however will still pad the input to
the requested size.  This ensures that the padding goes beyond the
next multiple of 512 if necessary, and also avoids the need for
-fno-toplevel-reorder which clang doesn't support.  signrom.py can then
error out if the requested size is too small for the actual size of the
compiled ROM.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Peter Maydell
2bb15bddf2 Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into staging
# gpg: Signature made Tue 09 Aug 2016 16:47:32 BST
# gpg:                using RSA key 0x7DEF8106AAFC390E
# gpg: Good signature from "John Snow (John Huston) <jsnow@redhat.com>"
# Primary key fingerprint: FAEB 9711 A12C F475 812F  18F2 88A9 064D 1835 61EB
#      Subkey fingerprint: F9B7 ABDB BCAC DF95 BE76  CBD0 7DEF 8106 AAFC 390E

* remotes/jnsnow/tags/ide-pull-request:
  atapi: fix halted DMA reset

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-09 16:53:32 +01:00
John Snow
7f951b2d77 atapi: fix halted DMA reset
Followup to 87ac25fd, this time for ATAPI DMA.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Message-id: 1470164128-28158-1-git-send-email-jsnow@redhat.com
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
2016-08-09 11:47:23 -04:00
Peter Maydell
ab861f3915 Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Tue 09 Aug 2016 08:28:39 BST
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  hw/net: Fix a heap overflow in xlnx.xps-ethernetlite
  net: vmxnet3: check for device_active before write
  net: check fragment length during fragmentation

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-09 10:44:27 +01:00
chaojianhu
a0d1cbdacf hw/net: Fix a heap overflow in xlnx.xps-ethernetlite
The .receive callback of xlnx.xps-ethernetlite doesn't check the length
of data before calling memcpy. As a result, the NetClientState object in
heap will be overflowed. All versions of qemu with xlnx.xps-ethernetlite
will be affected.

Reported-by: chaojianhu <chaojianhu@hotmail.com>
Signed-off-by: chaojianhu <chaojianhu@hotmail.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-09 15:27:18 +08:00
Li Qiang
6c352ca9b4 net: vmxnet3: check for device_active before write
Vmxnet3 device emulator does not check if the device is active,
before using it for write. It leads to a use after free issue,
if the vmxnet3_io_bar0_write routine is called after the device is
deactivated. Add check to avoid it.

Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-09 15:24:56 +08:00
Prasad J Pandit
ead315e43e net: check fragment length during fragmentation
Network transport abstraction layer supports packet fragmentation.
While fragmenting a packet, it checks for more fragments from
packet length and current fragment length. It is susceptible
to an infinite loop, if the current fragment length is zero.
Add check to avoid it.

Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Reviewed-by: Dmitry Fleytman <dmitry@daynix.com>
CC: qemu-stable@nongnu.org
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-09 11:45:30 +08:00
Peter Maydell
53279c76cf Update version for v2.7.0-rc2 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 17:26:11 +01:00
Peter Maydell
4977bb09dd Merge remote-tracking branch 'remotes/armbru/tags/pull-monitor-2016-08-08' into staging
Monitor patches for 2016-08-08

# gpg: Signature made Mon 08 Aug 2016 13:24:42 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-monitor-2016-08-08:
  audio: clean up before monitor clean up
  monitor: fix crash when leaving qemu with spice audio

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 15:57:06 +01:00
Peter Maydell
b8dc0fcff1 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
More block layer patches for 2.7.0-rc2

# gpg: Signature made Mon 08 Aug 2016 12:51:30 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  iotests: fix 109
  mirror: finish earlier on error
  tests: Test blockjob IDs
  block/qdev: Let 'drive' property fall back to node name

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 15:21:33 +01:00
Peter Maydell
684b6b26af Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160808' into staging
One more s390x fix for a bug in the pci rework.

# gpg: Signature made Mon 08 Aug 2016 11:49:34 BST
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20160808:
  s390x/pci: fix null pointer bug

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 14:24:51 +01:00
Peter Maydell
47dc0ec576 hw/sparc/leon3: Don't call get_image_size() on a NULL pointer
get_image_size() doesn't handle being passed a NULL pointer, so
avoid doing that. Spotted by the clang ub sanitizer (which notices
the attempt to pass NULL to open()).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1470391439-28427-1-git-send-email-peter.maydell@linaro.org
2016-08-08 13:58:42 +01:00
Peter Maydell
f5edfcfafb Merge remote-tracking branch 'remotes/armbru/tags/pull-error-2016-08-08' into staging
Error reporting patches for 2016-08-08

# gpg: Signature made Mon 08 Aug 2016 08:14:49 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-error-2016-08-08:
  error: Fix error_printf() calls lacking newlines
  vfio: Use error_report() instead of error_printf() for errors
  checkpatch: Fix newline detection in error_setg() & friends
  error: Strip trailing '\n' from error string arguments (again)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 13:25:35 +01:00
Marc-André Lureau
a384c205ac audio: clean up before monitor clean up
Since aa5cb7f5e, the chardevs are being cleaned up when leaving qemu,
before the atexit() handlers. audio_cleanup() may use the monitor to
notify of changes. For compatibility reasons, let's clean up audio
before the monitor so it keeps emitting monitor events.

The audio_atexit() function is made idempotent (so it can be called
multiple times), and renamed to audio_cleanup(). Since coreaudio
backend is using a 'isAtexit' code path, change it to check
audio_is_cleaning_up() instead, so the path is taken during normal
exit.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20160801112343.29082-3-marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-08-08 14:17:00 +02:00
Marc-André Lureau
2ef45716e1 monitor: fix crash when leaving qemu with spice audio
Since aa5cb7f5e, the chardevs are being cleaned up when leaving
qemu. However, the monitor has still references to them, which may
lead to crashes when running atexit() and trying to send monitor
events:

 #0  0x00007fffdb18f6f5 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
 #1  0x00007fffdb1912fa in __GI_abort () at abort.c:89
 #2  0x0000555555c263e7 in error_exit (err=22, msg=0x555555d47980 <__func__.13537> "qemu_mutex_lock") at util/qemu-thread-posix.c:39
 #3  0x0000555555c26488 in qemu_mutex_lock (mutex=0x5555567a2420) at util/qemu-thread-posix.c:66
 #4  0x00005555558c52db in qemu_chr_fe_write (s=0x5555567a2420, buf=0x55555740dc40 "{\"timestamp\": {\"seconds\": 1470041716, \"microseconds\": 989699}, \"event\": \"SPICE_DISCONNECTED\", \"data\": {\"server\": {\"port\": \"5900\", \"family\": \"ipv4\", \"host\": \"127.0.0.1\"}, \"client\": {\"port\": \"40272\", \"f"..., len=240) at qemu-char.c:280
 #5  0x0000555555787cad in monitor_flush_locked (mon=0x5555567bd9e0) at /home/elmarco/src/qemu/monitor.c:311
 #6  0x0000555555787e46 in monitor_puts (mon=0x5555567bd9e0, str=0x5555567a44ef "") at /home/elmarco/src/qemu/monitor.c:353
 #7  0x00005555557880fe in monitor_json_emitter (mon=0x5555567bd9e0, data=0x5555567c73a0) at /home/elmarco/src/qemu/monitor.c:401
 #8  0x00005555557882d2 in monitor_qapi_event_emit (event=QAPI_EVENT_SPICE_DISCONNECTED, qdict=0x5555567c73a0) at /home/elmarco/src/qemu/monitor.c:472
 #9  0x000055555578838f in monitor_qapi_event_queue (event=QAPI_EVENT_SPICE_DISCONNECTED, qdict=0x5555567c73a0, errp=0x7fffffffca88) at /home/elmarco/src/qemu/monitor.c:497
 #10 0x0000555555c15541 in qapi_event_send_spice_disconnected (server=0x5555571139d0, client=0x5555570d0db0, errp=0x5555566c0428 <error_abort>) at qapi-event.c:1038
 #11 0x0000555555b11bc6 in channel_event (event=3, info=0x5555570d6c00) at ui/spice-core.c:248
 #12 0x00007fffdcc9983a in adapter_channel_event (event=3, info=0x5555570d6c00) at reds.c:120
 #13 0x00007fffdcc99a25 in reds_handle_channel_event (reds=0x5555567a9d60, event=3, info=0x5555570d6c00) at reds.c:324
 #14 0x00007fffdcc7d4c4 in main_dispatcher_self_handle_channel_event (self=0x5555567b28b0, event=3, info=0x5555570d6c00) at main-dispatcher.c:175
 #15 0x00007fffdcc7d5b1 in main_dispatcher_channel_event (self=0x5555567b28b0, event=3, info=0x5555570d6c00) at main-dispatcher.c:194
 #16 0x00007fffdcca7674 in reds_stream_push_channel_event (s=0x5555570d9910, event=3) at reds-stream.c:354
 #17 0x00007fffdcca749b in reds_stream_free (s=0x5555570d9910) at reds-stream.c:323
 #18 0x00007fffdccb5dad in snd_disconnect_channel (channel=0x5555576a89a0) at sound.c:229
 #19 0x00007fffdccb9e57 in snd_detach_common (worker=0x555557739720) at sound.c:1589
 #20 0x00007fffdccb9f0e in snd_detach_playback (sin=0x5555569fe3f8) at sound.c:1602
 #21 0x00007fffdcca3373 in spice_server_remove_interface (sin=0x5555569fe3f8) at reds.c:3387
 #22 0x00005555558ff6e2 in line_out_fini (hw=0x5555569fe370) at audio/spiceaudio.c:152
 #23 0x00005555558f909e in audio_atexit () at audio/audio.c:1754
 #24 0x00007fffdb1941e8 in __run_exit_handlers (status=0, listp=0x7fffdb5175d8 <__exit_funcs>, run_list_atexit=run_list_atexit@entry=true) at exit.c:82
 #25 0x00007fffdb194235 in __GI_exit (status=<optimized out>) at exit.c:104
 #26 0x00007fffdb17b738 in __libc_start_main (main=0x5555558d7874 <main>, argc=67, argv=0x7fffffffcf48, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7fffffffcf38) at ../csu/libc-start.c:323

Add a monitor_cleanup() functions to remove all the monitors before
cleaning up the chardev. Note that we are "losing" some events that
used to be sent during atexit().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20160801112343.29082-2-marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-08-08 14:16:11 +02:00
Peter Maydell
9efaf7f5f5 Merge remote-tracking branch 'remotes/elmarco/tags/leaks-for-2.7-pull-request' into staging
# gpg: Signature made Sun 07 Aug 2016 21:03:14 BST
# gpg:                using RSA key 0xDAE8E10975969CE5
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>"
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* remotes/elmarco/tags/leaks-for-2.7-pull-request:
  ahci: fix sglist leak on retry
  usb: free leaking path
  usb: free USBDevice.strings
  virtio-input: free config list
  qjson: free str
  ahci: free irqs array
  char: free MuxDriver when closing
  char: free the tcp connection data when closing
  numa: do not leak NumaOptions

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 12:41:38 +01:00
Vladimir Sementsov-Ogievskiy
a752e4786c iotests: fix 109
109 iotest is broken for raw after 0965a41e99
[mirror: double performance of the bulk stage if the disc is full]

The problem is with finishing block-job with error: before specified
patch mirror was not very async and it created one big request at disk
start, this request finished with error and qemu produced
BLOCK_JOB_COMPLETED with zero progress.

After 0965a41, mirror starts several smaller requests in parallel, when
BLOCK_JOB_COMPLETED emited we have some successful non-zero progress.

This patch solves the issue by filtering out progress from 109 test
output.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-08 13:05:43 +02:00
Vladimir Sementsov-Ogievskiy
dbaa7b57ec mirror: finish earlier on error
Stop to produce new async copy requests from mirror_iteration if
critical error (error action = BLOCK_ERROR_ACTION_REPORT) detected.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-08 13:05:43 +02:00
Alberto Garcia
9ef8112a24 tests: Test blockjob IDs
Since 7f0317cfc8 we have API to specify the ID of block jobs and we
also guarantee that they are well-formed and unique.

This patch adds tests to check some common scenarios.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-08 13:05:43 +02:00
Kevin Wolf
bd7c41765b block/qdev: Let 'drive' property fall back to node name
If a qdev block device is created with an anonymous BlockBackend (i.e.
a node name rather than a BB name was given for the drive property),
qdev used to return an empty string when the property was read. This
patch fixes it to return the node name instead.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 13:05:43 +02:00
Yi Min Zhao
7fc0abf4cb s390x/pci: fix null pointer bug
We should make sure that it's not NULL firstly.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-08-08 12:47:02 +02:00
Peter Maydell
cbda16c010 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160808' into staging
ppc patch queue 2016-08-08

This batch has several last minute bug fixes to be merged for
qemu-2.7.

# gpg: Signature made Mon 08 Aug 2016 03:40:58 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160808:
  spapr: Fix undefined behaviour in spapr_tce_reset()
  macio: set res_count value to 0 after non-block ATAPI DMA transfers
  spapr: Correctly set query_hotpluggable_cpus hook based on machine version

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 11:32:01 +01:00
Peter Maydell
cf5198d580 Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20160805' into staging
indirect register lowering

# gpg: Signature made Fri 05 Aug 2016 17:34:53 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20160805:
  tcg: Lower indirect registers in a separate pass
  tcg: Require liveness analysis
  tcg: Include liveness info in the dumps
  tcg: Compress dead_temps and mem_temps into a single array
  tcg: Fold life data into TCGOp
  tcg: Reorg TCGOp chaining
  tcg: Compress liveness data to 16 bits

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 10:39:18 +01:00
Markus Armbruster
7ea7d36e34 error: Fix error_printf() calls lacking newlines
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1470224274-31522-5-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 09:01:27 +02:00
Markus Armbruster
fea1c0999a vfio: Use error_report() instead of error_printf() for errors
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1470224274-31522-4-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 09:01:18 +02:00
Markus Armbruster
a47eb01098 checkpatch: Fix newline detection in error_setg() & friends
Commit 5d596c2's regexp assumes the error message string is the first
argument.  Correct for error_report(), wrong for all the others.
Relax the regexp to match newline in anywhere.  This might cause
additional false positives.

While there, update the list of error_reporting functions.

Cc: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1470224274-31522-3-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 09:01:08 +02:00
Markus Armbruster
df3c286c53 error: Strip trailing '\n' from error string arguments (again)
Commit 9af9e0f, 6daf194d, be62a2eb and 312fd5f got rid of a bunch, but
they keep coming back.  checkpatch.pl tries to flag them since commit
5d596c2, but it's not very good at it.  Offenders tracked down with
Coccinelle script scripts/coccinelle/err-bad-newline.cocci, an updated
version of the script from commit 312fd5f.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1470224274-31522-2-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 09:00:44 +02:00
David Gibson
57c0eb1e0d spapr: Fix undefined behaviour in spapr_tce_reset()
When a TCE table (sPAPR IOMMU context) is in disabled state (which is true
by default for the 64-bit window), it has tcet->nb_table == 0 and
tcet->table == NULL.  However, on system reset, spapr_tce_reset() executes,
which unconditionally calls
        memset(tcet->table, 0, table_size);

We get away with this in practice, because it's a zero length memset(),
but memset() on a NULL pointer is undefined behaviour, so we should not
call it in this case.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-08 10:06:25 +10:00
Mark Cave-Ayland
16275edb34 macio: set res_count value to 0 after non-block ATAPI DMA transfers
res_count should be set to the number of outstanding bytes after a DBDMA
request. Unfortunately this wasn't being set to zero by the non-block
transfer codepath meaning drivers that checked the descriptor result for
such requests (e.g reading the CDROM TOC) would assume from a non-zero result
that the transfer had failed.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-08 09:45:03 +10:00
David Gibson
3c0c47e346 spapr: Correctly set query_hotpluggable_cpus hook based on machine version
Prior to c8721d3 "spapr: Error out when CPU hotplug is attempted on older
pseries machines", attempting to use query-hotpluggable-cpus on pseries-2.6
and earlier machine types would SEGV.

That change fixed that, but due to some unexpected interactions in init
order and a brown-paper-bag worthy failure to test, it accidentally
disabled query-hotpluggable-cpus for all pseries machine types, including
the current one which should allow it.

In fact, query_hotpluggable_cpus needs to be non-NULL when and only when
the dr_cpu_enabled flag in sPAPRMachineClass is set, which makes
dr_cpu_enabled itself redundant.

This patch removes dr_cpu_enabled, instead directly setting
query_hotpluggable_cpus from the machine class_init functions, and using
that to determine the availability of CPU hotplug when necessary.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-08 09:45:03 +10:00
Marc-André Lureau
5839df7b71 ahci: fix sglist leak on retry
ahci-test /x86_64/ahci/io/dma/lba28/retry triggers the following leak:

Direct leak of 16 byte(s) in 1 object(s) allocated from:
    #0 0x7fc4b2a25e20 in malloc (/lib64/libasan.so.3+0xc6e20)
    #1 0x7fc4993bce58 in g_malloc (/lib64/libglib-2.0.so.0+0x4ee58)
    #2 0x556a187d4b34 in ahci_populate_sglist hw/ide/ahci.c:896
    #3 0x556a187d8237 in ahci_dma_prepare_buf hw/ide/ahci.c:1367
    #4 0x556a187b5a1a in ide_dma_cb hw/ide/core.c:844
    #5 0x556a187d7eec in ahci_start_dma hw/ide/ahci.c:1333
    #6 0x556a187b650b in ide_start_dma hw/ide/core.c:921
    #7 0x556a187b61e6 in ide_sector_start_dma hw/ide/core.c:911
    #8 0x556a187b9e26 in cmd_write_dma hw/ide/core.c:1486
    #9 0x556a187bd519 in ide_exec_cmd hw/ide/core.c:2027
    #10 0x556a187d71c5 in handle_reg_h2d_fis hw/ide/ahci.c:1204
    #11 0x556a187d7681 in handle_cmd hw/ide/ahci.c:1254
    #12 0x556a187d168a in check_cmd hw/ide/ahci.c:510
    #13 0x556a187d0afc in ahci_port_write hw/ide/ahci.c:314
    #14 0x556a187d105d in ahci_mem_write hw/ide/ahci.c:435
    #15 0x556a1831d959 in memory_region_write_accessor /home/elmarco/src/qemu/memory.c:525
    #16 0x556a1831dc35 in access_with_adjusted_size /home/elmarco/src/qemu/memory.c:591
    #17 0x556a18323ce3 in memory_region_dispatch_write /home/elmarco/src/qemu/memory.c:1262
    #18 0x556a1828cf67 in address_space_write_continue /home/elmarco/src/qemu/exec.c:2578
    #19 0x556a1828d20b in address_space_write /home/elmarco/src/qemu/exec.c:2635
    #20 0x556a1828d92b in address_space_rw /home/elmarco/src/qemu/exec.c:2737
    #21 0x556a1828daf7 in cpu_physical_memory_rw /home/elmarco/src/qemu/exec.c:2746
    #22 0x556a183068d3 in cpu_physical_memory_write /home/elmarco/src/qemu/include/exec/cpu-common.h:72
    #23 0x556a18308194 in qtest_process_command /home/elmarco/src/qemu/qtest.c:382
    #24 0x556a18309999 in qtest_process_inbuf /home/elmarco/src/qemu/qtest.c:573
    #25 0x556a18309a4a in qtest_read /home/elmarco/src/qemu/qtest.c:585
    #26 0x556a18598b85 in qemu_chr_be_write_impl /home/elmarco/src/qemu/qemu-char.c:387
    #27 0x556a18598c52 in qemu_chr_be_write /home/elmarco/src/qemu/qemu-char.c:399
    #28 0x556a185a2afa in tcp_chr_read /home/elmarco/src/qemu/qemu-char.c:2902
    #29 0x556a18cbaf52 in qio_channel_fd_source_dispatch io/channel-watch.c:84

Follow John Snow recommendation:
  Everywhere else ncq_err is used, it is accompanied by a list cleanup
  except for ncq_cb, which is the case you are fixing here.

  Move the sglist destruction inside of ncq_err and then delete it from
  the other two locations to keep it tidy.

  Call dma_buf_commit in ide_dma_cb after the early return. Though, this
  is also a little wonky because this routine does more than clear the
  list, but it is at the moment the centralized "we're done with the
  sglist" function and none of the other side effects that occur in
  dma_buf_commit will interfere with the reset that occurs from
  ide_restart_bh, I think

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2016-08-08 00:00:41 +04:00
Marc-André Lureau
9ef617246b usb: free leaking path
qdev_get_dev_path() returns an allocated string, free it when no longer
needed.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-08 00:00:36 +04:00
Marc-André Lureau
ec507f1123 usb: free USBDevice.strings
The list is created during instance init and further populated with
usb_desc_set_string(). Clear it when unrealizing the device.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-08 00:00:32 +04:00
Marc-André Lureau
0137a557aa virtio-input: free config list
Clear the list when finalizing. The list is created during realize with
virtio_input_idstr_config() and later by further calls to
virtio_input_init_config() and virtio_input_add_config().

This leak can be reproduced with device-introspect-test -p
/x86_64/device/introspect/concrete.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-08 00:00:28 +04:00
Marc-André Lureau
df37dd6ffe qjson: free str
Release the qstring allocated in qjson_new().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 00:00:24 +04:00
Marc-André Lureau
9d324b0e67 ahci: free irqs array
Each irq is referenced by the IDEBus in ide_init2(), thus we can free
the no longer used array.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Acked-by: John Snow <jsnow@redhat.com>
2016-08-08 00:00:20 +04:00
Marc-André Lureau
1371a36936 char: free MuxDriver when closing
Similarly to other chr_close callbacks, free char type specific data.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 00:00:15 +04:00
Marc-André Lureau
5b498459b4 char: free the tcp connection data when closing
Make sure the connection data got freed when closing the chardev, to
avoid leaks. Introduce tcp_chr_free_connection() to clean all connection
related data, and move some tcp_chr_close() clean-ups there.

(while at it, set write_msgfds_num to 0 when clearing array in
tcp_set_msgfds())

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-08 00:00:11 +04:00
Marc-André Lureau
157e94e8a2 numa: do not leak NumaOptions
In all cases, call qapi_free_NumaOptions(), by using a common ending
block.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-07 23:59:59 +04:00
Richard Henderson
5a18407f55 tcg: Lower indirect registers in a separate pass
Rather than rely on recursion during the middle of register allocation,
lower indirect registers to loads and stores off the indirect base into
plain temps.

For an x86_64 host, with sufficient registers, this results in identical
code, modulo the actual register assignments.

For an i686 host, with insufficient registers, this means that temps can
be (temporarily) spilled to the stack in order to satisfy an allocation.
This as opposed to the possibility of not being able to spill, to allocate
a register for the indirect base, in order to perform a spill.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:40 +05:30
Richard Henderson
c0ef05b5e6 tcg: Require liveness analysis
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:40 +05:30
Richard Henderson
bdfb460ef7 tcg: Include liveness info in the dumps
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:40 +05:30
Richard Henderson
c70fbf0a99 tcg: Compress dead_temps and mem_temps into a single array
We only need two bits per temporary.  Fold the two bytes into one,
and reduce the memory and cachelines required during compilation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:40 +05:30
Richard Henderson
bee158cb4d tcg: Fold life data into TCGOp
Reduce the size of other bitfields to make room.
This reduces the cache footprint of compilation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:40 +05:30
Richard Henderson
dcb8e75870 tcg: Reorg TCGOp chaining
Instead of using -1 as end of chain, use 0, and link through the 0
entry as a fully circular double-linked list.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:18 +05:30
Richard Henderson
a1b3c48d2b tcg: Compress liveness data to 16 bits
This reduces both memory usage and per-insn cacheline usage
during code generation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:17 +05:30
Peter Maydell
51009170d8 tests: Rename qtests which have names ending "error"
We have three qtest tests which have test names ending with "error".
This is awkward because the output of verbose test runs looks like
  /crypto/task/error:                                                  OK
  /crypto/task/thread_error:                                           OK

which gives false positives if you are grepping build logs for
errors by looking for "error:". Since there are only three tests
with this problem, just rename them all to 'failure' instead.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1470307178-22848-1-git-send-email-peter.maydell@linaro.org
2016-08-05 15:27:15 +01:00
Stefan Weil
c025f689a1 wxx: Fix handling of files used for character devices
On Windows, such files were not truncated like on all other hosts.
Now we also test whether truncation is needed when running on Windows.

The append case was also incorrect because it needs a different value
for the desired access mode.

Reported-by: Benjamin David Lunt <fys@fysnet.net>
Signed-off-by: Stefan Weil <sw@weilnetz.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1470114877-1466-1-git-send-email-sw@weilnetz.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-05 14:15:14 +01:00
Peter Maydell
bd8eda537f Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches for 2.7.0-rc2

# gpg: Signature made Fri 05 Aug 2016 10:30:12 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  nvme: bump PCI revision
  nvme: fix identify to be NVMe 1.1 compliant
  block: Accept any target node for transactional blockdev-backup

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-05 13:05:29 +01:00
Peter Maydell
8bfa87a231 Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Fri 05 Aug 2016 10:24:34 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  virtio-blk: Remove stale comment about draining
  virtio-blk: Release s->rq queue at system_reset
  throttle: Test burst limits lower than the normal limits
  throttle: Don't allow burst limits to be lower than the normal limits
  block/parallels: check new image size

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-05 11:44:56 +01:00
Peter Maydell
fbe400d246 Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
# gpg: Signature made Fri 05 Aug 2016 09:58:50 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  docker: Add "--enable-werror" to configure command line
  docker: Be compatible with older docker

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-05 11:16:30 +01:00
Peter Maydell
676e844d56 Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20160804-tag' into staging
Xen 2016/08/04

# gpg: Signature made Thu 04 Aug 2016 18:43:14 BST
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20160804-tag:
  Xen PCI passthrough: fix passthrough failure when no interrupt pin

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-05 10:23:12 +01:00
Fam Zheng
27d1b87688 virtio-blk: Remove stale comment about draining
This is stale after commit 6e40b3bf (virtio-blk: Use blk_drain() to
drain IO requests), remove it.

Suggested-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Message-id: 1470278654-13525-3-git-send-email-famz@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 09:59:06 +01:00
Fam Zheng
26307f6aa4 virtio-blk: Release s->rq queue at system_reset
At system_reset, there is no point in retrying the queued request,
because the driver that issued the request won't be around any more.

Analyzed-by: Laszlo Ersek <lersek@redhat.com>
Reported-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Message-id: 1470278654-13525-2-git-send-email-famz@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 09:59:06 +01:00
Alberto Garcia
5fc8c052ce throttle: Test burst limits lower than the normal limits
This checks that making FOO_max lower than FOO is not allowed.

We could also forbid having FOO_max == FOO, but that doesn't have
any odd side effects and it would require us to update several other
tests, so let's keep it simple.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 2f90f9ee58aa14b7bd985f67c5996b06e0ab6c19.1469693110.git.berto@igalia.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 09:59:06 +01:00
Alberto Garcia
aaa1e77ffa throttle: Don't allow burst limits to be lower than the normal limits
Setting FOO_max to a value that is lower than FOO does not make
sense, and it produces odd results depending on the value of
FOO_max_length. Although the user should not set that configuration
in the first place it's better to reject it explicitly.

https://bugzilla.redhat.com/show_bug.cgi?id=1355665

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reported-by: Gu Nini <ngu@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 663d5aca406060e31f80d8113f77b6feee63b919.1469693110.git.berto@igalia.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 09:59:06 +01:00
Klim Kireev
555a608c5d block/parallels: check new image size
Before this patch incorrect image could be created via qemu-img
(Example: qemu-img create -f parallels -o size=4096T hack.img),
incorrect images cannot be used due to overflow in main image structure.

This patch add check of size in image creation.

After reading size it compare it with UINT32_MAX * cluster_size.

Signed-off-by: Klim Kireev <proffk@virtuozzo.mipt.ru>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-id: 1469639300-12155-1-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 09:59:06 +01:00
Christoph Hellwig
47989f1447 nvme: bump PCI revision
The broken Identify implementation in earlier Qemu versions means we
need to blacklist it from issueing the NVMe 1.1 Identify Namespace List
command.  As we want to be able to use it in newer Qemu versions we need
a way to identify those.  Bump the PCI revision as a guest visible
indicator of this bug fix.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-05 10:56:08 +02:00
Christoph Hellwig
03035a23a3 nvme: fix identify to be NVMe 1.1 compliant
NVMe 1.1 requires devices to implement a Namespace List subcommand of
the identify command.  Qemu not only not implements this features, but
also misinterprets it as an Identify Controller request.  Due to this
any OS trying to use the Namespace List will fail the probe.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-05 10:55:52 +02:00
Kevin Wolf
39d990ac38 block: Accept any target node for transactional blockdev-backup
Commit 0d978913 changed blockdev-backup to accept arbitrary node names
instead of device names (i.e. root nodes) for the backup target.
However, it forgot to make the same change in transactions and to update
the documentation. This patch fixes these omissions.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 10:55:14 +02:00
Fam Zheng
4a93f78ed0 docker: Add "--enable-werror" to configure command line
We don't have .git in the docker checkout, add this to enable -Werror
explicitly.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1469453510-658-1-git-send-email-famz@redhat.com
2016-08-05 16:34:55 +08:00
Fam Zheng
95d203cd1e docker: Be compatible with older docker
By not using "--format" with docker images command.

The option is not available on RHEL 7 docker command. Use an awk
matching command instead.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1470202928-3392-1-git-send-email-famz@redhat.com>
2016-08-05 16:34:52 +08:00
Bruce Rogers
0968c91ce0 Xen PCI passthrough: fix passthrough failure when no interrupt pin
Commit 5a11d0f7 mistakenly converted a log message into an error
condition when no pin interrupt is found for the pci device being
passed through. Revert that part of the commit.

Signed-off-by: Bruce Rogers <brogers@suse.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
2016-08-04 10:42:48 -07:00
Peter Maydell
42e0d60f16 Merge remote-tracking branch 'remotes/riku/tags/pull-linux-user-20160804' into staging
linux-user important fixes for 2.7

# gpg: Signature made Thu 04 Aug 2016 15:10:57 BST
# gpg:                using RSA key 0xB44890DEDE3C9BC0
# gpg: Good signature from "Riku Voipio <riku.voipio@iki.fi>"
# gpg:                 aka "Riku Voipio <riku.voipio@linaro.org>"
# Primary key fingerprint: FF82 03C8 C391 98AE 0581  41EF B448 90DE DE3C 9BC0

* remotes/riku/tags/pull-linux-user-20160804:
  linux-user: Handle brk() attempts with very large sizes
  linux-user: Fix target_semid_ds structure definition
  linux-user: Don't write off end of new_utsname buffer
  linux-user: Fix memchr() argument in open_self_cmdline()
  linux-user: Use correct alignment for long long on i386 guests

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-04 18:36:05 +01:00
Peter Maydell
ef4330c23b linux-user: Handle brk() attempts with very large sizes
In do_brk(), we were inadvertently truncating the size
of a requested brk() from the guest by putting it into an
'int' variable. This meant that we would incorrectly report
success back to the guest rather than a failed allocation,
typically resulting in the guest then segfaulting. Use
abi_ulong instead.

This fixes a crash in the '31370.cc' test in the gcc libstdc++ test
suite (the test case starts by trying to allocate a very large
size and reduces the size until the allocation succeeds).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-08-04 16:38:17 +03:00
Peter Maydell
005eb2ae1f linux-user: Fix target_semid_ds structure definition
The target_semid_ds structure is not correct for all
architectures: the padding fields should only exist for:
 * 32-bit ABIs
 * x86

It is also misnamed, since it is following the kernel
semid64_ds structure (QEMU doesn't support the legacy
semid_ds structure at all). Rename the struct, provide
a correct generic definition and allow the oddball x86
architecture to provide its own version.

This fixes broken SYSV semaphores for all our 64-bit
architectures except x86 and ppc.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-08-04 16:36:53 +03:00
Peter Maydell
332c9781f6 linux-user: Don't write off end of new_utsname buffer
Use g_strlcpy() rather than strcpy() to copy the uname string
into the structure we return to the guest for the uname syscall.
This avoids overrunning the buffer if the user passed us an
overlong string via the QEMU command line.

We fix a comment typo while we're in the neighbourhood.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-08-04 16:36:26 +03:00
Peter Maydell
ba4b3f668a linux-user: Fix memchr() argument in open_self_cmdline()
In open_self_cmdline() we look for a 0 in the buffer we read
from /prc/self/cmdline. We were incorrectly passing the length
of our buf[] array to memchr() as the length to search, rather
than the number of bytes we actually read into it, which could
be shorter. This was spotted by Coverity (because it could
result in our trying to pass a negative length argument to
write()).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-08-04 16:35:30 +03:00
Peter Maydell
d9fe91d868 linux-user: Use correct alignment for long long on i386 guests
For i386, the ABI specifies that 'long long' (8 byte values)
need only be 4 aligned, but we were requiring them to be
8-aligned. This meant we were laying out the target_epoll_event
structure wrongly. Add a suitable ifdef to abitypes.h to
specify the i386-specific alignment requirement.

Reported-by: Icenowy Zheng <icenowy@aosc.xyz>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-08-04 16:34:59 +03:00
Peter Maydell
09704e6ded Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* xsetbv fix (x86 targets TCG)
* remove unused functions
* qht segfault and memory leak fixes
* NBD fixes
* Fix for non-power-of-2 discard granularity
* Memory hotplug fixes
* Migration regressions
* IOAPIC fixes and (disabled by default) EOI register support
* Various other small fixes

# gpg: Signature made Wed 03 Aug 2016 18:01:05 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (25 commits)
  util: Fix assertion in iov_copy() upon zero 'bytes' and non-zero 'offset'
  qdev: Fix use after free in qdev_init_nofail error path
  Reorganize help output of '-display' option
  x86: ioapic: add support for explicit EOI
  x86: ioapic: ignore level irq during processing
  apic: fix broken migration for kvm-apic
  fw_cfg: Make base type "fw_cfg" abstract
  block: Cater to iscsi with non-power-of-2 discard
  osdep: Document differences in rounding macros
  nbd: Limit nbdflags to 16 bits
  nbd: Fix bad flag detection on server
  i2c: fix migration regression introduced by broadcast support
  mptsas: really fix migration compatibility
  qdist: return "(empty)" instead of NULL when printing an empty dist
  qdist: use g_renew and g_new instead of g_realloc and g_malloc.
  qdist: fix memory leak during binning
  target-i386: fix typo in xsetbv implementation
  qht: do not segfault when gathering stats from an uninitialized qht
  util: Drop inet_listen()
  util: drop unix_nonblocking_connect()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-04 10:24:27 +01:00
Peter Maydell
29b2517ac7 Merge remote-tracking branch 'remotes/kraxel/tags/pull-vnc-20160803-1' into staging
vnc: fixes for "-vnc none".

# gpg: Signature made Wed 03 Aug 2016 16:33:07 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-vnc-20160803-1:
  vnc: ensure connection sharing/limits is always configured
  vnc: fix crash when vnc_server_info_get has an error
  vnc: don't crash getting server info if lsock is NULL

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-03 18:22:51 +01:00
Shmulik Ladkani
e911765cbb util: Fix assertion in iov_copy() upon zero 'bytes' and non-zero 'offset'
In cases where iov_copy() is passed with zero 'bytes' argument and a
non-zero 'offset' argument, nothing gets copied - as expected.

However no copy iterations are performed, so 'offset' is left
unaltered, leading to the final assert(offset == 0) to fail.

Instead, change the loop condition to continue as long as 'offset || bytes',
similar to other iov_* functions.

This ensures 'offset' gets zeroed (even if no actual copy is made),
unless it is beyond end of source iov - which is asserted.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@ravellosystems.com>
Message-Id: <1470130880-1050-1-git-send-email-shmulik.ladkani@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Fam Zheng
0d4104e576 qdev: Fix use after free in qdev_init_nofail error path
Since 69382d8b (qdev: Fix object reference leak in case device.realize()
fails), object_property_set_bool could release the object. The error
path wants the type name, so hold an reference before realizing it.

Cc: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1470109301-12966-1-git-send-email-famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Robert Ho
f04ec5afbb Reorganize help output of '-display' option
The '-display' help information is not very correct. This patch sort
it a little.
Also, in its help information, reveals what implicit display option
will be chosen if no definition.

Signed-off-by: Robert Ho <robert.hu@intel.com>
Message-Id: <1469528231-26206-1-git-send-email-robert.hu@intel.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Peter Xu
20fd4b7b6d x86: ioapic: add support for explicit EOI
Some old Linux kernels (upstream before v4.0), or any released RHEL
kernels has problem in sending APIC EOI when IR is enabled. Meanwhile,
many of them only support explicit EOI for IOAPIC, which is only
introduced in IOAPIC version 0x20. This patch provide a way to boost
QEMU IOAPIC to version 0x20, in order for QEMU to correctly receive EOI
messages.

Without boosting IOAPIC version to 0x20, kernels before commit d32932d
("x86/irq: Convert IOAPIC to use hierarchical irqdomain interfaces")
will have trouble enabling both IR and level-triggered interrupt devices
(like e1000).

To upgrade IOAPIC to version 0x20, we need to specify:

  -global ioapic.version=0x20

To be compatible with old systems, 0x11 will still be the default IOAPIC
version. Here 0x11 and 0x20 are the only versions to be supported.

One thing to mention: this patch only applies to emulated IOAPIC. It
does not affect kernel IOAPIC behavior.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1470059959-372-1-git-send-email-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Peter Xu
f99b86b949 x86: ioapic: ignore level irq during processing
For level triggered interrupts, we will get Remote IRR bit cleared after
guest kernel finished processing specific request. Before that, we
should ignore the same interrupt from triggering again.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1469974685-4144-1-git-send-email-peterx@redhat.com>
[Push new "if" up so that it covers KVM split irqchip as well. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Igor Mammedov
7298d4fd51 apic: fix broken migration for kvm-apic
commit f6e98444 (apic: Use apic_id as apic's migration instance_id)
breaks migration when in kernel irqchip is used for 2.6 and older
machine types.

It applies compat property only for userspace 'apic' type
instead of applying it to all apic types inherited from
'apic-common' type as it was supposed to do.

Fix it by setting compat property 'legacy-instance-id' for
'apic-common' type which affects inherited types (i.e. not
only 'apic' but also 'kvm-apic' types)

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1469800542-11402-1-git-send-email-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Markus Armbruster
e061fa3ca9 fw_cfg: Make base type "fw_cfg" abstract
Missed when commit 5712db6 split off "fw_cfg_io" and "fw_cfg_mem".

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1469777353-9383-1-git-send-email-armbru@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Eric Blake
b8d0a9804d block: Cater to iscsi with non-power-of-2 discard
Dell Equallogic iSCSI SANs have a very unusual advertised geometry:

$ iscsi-inq -e 1 -c $((0xb0)) iscsi://XXX/0
wsnz:0
maximum compare and write length:1
optimal transfer length granularity:0
maximum transfer length:0
optimal transfer length:0
maximum prefetch xdread xdwrite transfer length:0
maximum unmap lba count:30720
maximum unmap block descriptor count:2
optimal unmap granularity:30720
ugavalid:1
unmap granularity alignment:0
maximum write same length:30720

which says that both the maximum and the optimal discard size
is 15M.  It is not immediately apparent if the device allows
discard requests not aligned to the optimal size, nor if it
allows discards at a finer granularity than the optimal size.

I tried to find details in the SCSI Commands Reference Manual
Rev. A on what valid values of maximum and optimal sizes are
permitted, but while that document mentions a "Block Limits
VPD Page", I couldn't actually find documentation of that page
or what values it would have, or if a SCSI device has an
advertisement of its minimal unmap granularity.  So it is not
obvious to me whether the Dell Equallogic device is compliance
with the SCSI specification.

Fortunately, it is easy enough to support non-power-of-2 sizing,
even if it means we are less efficient than truly possible when
targetting that device (for example, it means that we refuse to
unmap anything that is not a multiple of 15M and aligned to a
15M boundary, even if the device truly does support a smaller
granularity where unmapping actually works).

Reported-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1469129688-22848-5-git-send-email-eblake@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Eric Blake
e9fd416e66 osdep: Document differences in rounding macros
Make it obvious which macros are safe in which situations.

Useful since QEMU_ALIGN_UP and ROUND_UP both purport to do
the same thing, but differ on whether the alignment must be
a power of 2.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1469129688-22848-4-git-send-email-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Eric Blake
7423f41782 nbd: Limit nbdflags to 16 bits
Rather than asserting that nbdflags is within range, just give
it the correct type to begin with :)  nbdflags corresponds to
the per-export portion of NBD Protocol "transmission flags", which
is 16 bits in response to NBD_OPT_EXPORT_NAME and NBD_OPT_GO.

Furthermore, upstream NBD has never passed the global flags to
the kernel via ioctl(NBD_SET_FLAGS) (the ioctl was first
introduced in NBD 2.9.22; then a latent bug in NBD 3.1 actually
tried to OR the global flags with the transmission flags, with
the disaster that the addition of NBD_FLAG_NO_ZEROES in 3.9
caused all earlier NBD 3.x clients to treat every export as
read-only; NBD 3.10 and later intentionally clip things to 16
bits to pass only transmission flags).  Qemu should follow suit,
since the current two global flags (NBD_FLAG_FIXED_NEWSTYLE
and NBD_FLAG_NO_ZEROES) have no impact on the kernel's behavior
during transmission.

CC: qemu-stable@nongnu.org
Signed-off-by: Eric Blake <eblake@redhat.com>

Message-Id: <1469129688-22848-3-git-send-email-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Eric Blake
5bee0f4717 nbd: Fix bad flag detection on server
Commit ab7c548e added a check for invalid flags, but used an
early return on error instead of properly going through the
cleanup label.

Signed-off-by: Eric Blake <eblake@redhat.com>

Message-Id: <1469129688-22848-2-git-send-email-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Igor Mammedov
71ae65e552 i2c: fix migration regression introduced by broadcast support
QEMU fails migration with following error:

qemu-system-x86_64: Missing section footer for i2c_bus
qemu-system-x86_64: load of migration failed: Invalid argument

when migrating from:
  qemu-system-x86_64-v2.6.0 -m 256M rhel72.img -M pc-i440fx-2.6
to
  qemu-system-x86_64-v2.7.0-rc0 -m 256M rhel72.img -M pc-i440fx-2.6

Regression is added by commit 2293c27f (i2c: implement broadcast write)

Fix it by dropping 'broadcast' VMState introduced by 2293c27f and
reuse broadcast 0x00 address as broadcast flag in bus->saved_address.
Then if there were ongoing broadcast at migration time, set
bus->saved_address to it and at i2c_slave_post_load() time check
for it instead of transfering and using 'broadcast' VMState.

As result of reusing existing saved_address VMState, no compat
glue will be needed to keep forward/backward compatiblity. which
makes fix much less intrusive.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1469623198-177227-1-git-send-email-imammedo@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Paolo Bonzini
0b646f44d9 mptsas: really fix migration compatibility
Commit 2e2aa316 removed internal flag msi_in_use, but it
existed in vmstate.  Restore it for migration to older QEMU
versions.

Reported-by: Amit Shah <amit.shah@redhat.com>
Suggested-by: Amit Shah <amit.shah@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Amit Shah <amit.shah@redhat.com>
Cc: Cao jin <caoj.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Emilio G. Cota
11b7b07f8a qdist: return "(empty)" instead of NULL when printing an empty dist
Printf'ing a NULL string is undefined behaviour. Avoid it.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1469459025-23606-4-git-send-email-cota@braap.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Emilio G. Cota
071d405477 qdist: use g_renew and g_new instead of g_realloc and g_malloc.
This is safer against overflow.  g_renew is available in all
version of glib, while g_realloc_n is only available in 2.24.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1469459025-23606-3-git-send-email-cota@braap.org>
[Rewritten to use g_new/g_renew. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:42:35 +02:00
Peter Maydell
6eac5f7bad Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20160803-1' into staging
usb: bugfixes for xen-usb and ehci, mingw build fix.

# gpg: Signature made Wed 03 Aug 2016 14:04:26 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20160803-1:
  xen: use a common function for pv and hvm guest backend register calls
  xen: drain submit queue in xen-usb before removing device
  xen: when removing a backend don't remove many of them
  ehci: faster frame index calculation for skipped frames
  wxx: Fix compilation of host-libusb.c
  wxx: Fix compiler warning for host-libusb.c

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-03 14:25:10 +01:00
Daniel P. Berrange
12e29b1682 vnc: ensure connection sharing/limits is always configured
The connection sharing / limits are only set in the
vnc_display_open() method and so missed when VNC is running
with '-vnc none'. This in turn prevents clients being added
to the VNC server with the QMP "add_client" command.

This was introduced in

  commit e5f34cdd2d
  Author: Gerd Hoffmann <kraxel@redhat.com>
  Date:   Thu Oct 2 12:09:34 2014 +0200

      vnc: track & limit connections

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1470134726-15697-4-git-send-email-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 15:06:32 +02:00
Daniel P. Berrange
3e7f136d8b vnc: fix crash when vnc_server_info_get has an error
The vnc_server_info_get will allocate the VncServerInfo
struct and then call vnc_init_basic_info_from_server_addr
to populate the basic fields. If this returns an error
though, the qapi_free_VncServerInfo call will then crash
because the VncServerInfo struct instance was not properly
NULL-initialized and thus contains random stack garbage.

 #0  0x00007f1987c8e6f5 in raise () at /lib64/libc.so.6
 #1  0x00007f1987c902fa in abort () at /lib64/libc.so.6
 #2  0x00007f1987ccf600 in __libc_message () at /lib64/libc.so.6
 #3  0x00007f1987cd7d4a in _int_free () at /lib64/libc.so.6
 #4  0x00007f1987cdb2ac in free () at /lib64/libc.so.6
 #5  0x00007f198b654f6e in g_free () at /lib64/libglib-2.0.so.0
 #6  0x0000559193cdcf54 in visit_type_str (v=v@entry=
     0x5591972f14b0, name=name@entry=0x559193de1e29 "host", obj=obj@entry=0x5591961dbfa0, errp=errp@entry=0x7fffd7899d80)
     at qapi/qapi-visit-core.c:255
 #7  0x0000559193cca8f3 in visit_type_VncBasicInfo_members (v=v@entry=
     0x5591972f14b0, obj=obj@entry=0x5591961dbfa0, errp=errp@entry=0x7fffd7899dc0) at qapi-visit.c:12307
 #8  0x0000559193ccb523 in visit_type_VncServerInfo_members (v=v@entry=
     0x5591972f14b0, obj=0x5591961dbfa0, errp=errp@entry=0x7fffd7899e00) at qapi-visit.c:12632
 #9  0x0000559193ccb60b in visit_type_VncServerInfo (v=v@entry=
     0x5591972f14b0, name=name@entry=0x0, obj=obj@entry=0x7fffd7899e48, errp=errp@entry=0x0) at qapi-visit.c:12658
 #10 0x0000559193cb53d8 in qapi_free_VncServerInfo (obj=<optimized out>) at qapi-types.c:3970
 #11 0x0000559193c1e6ba in vnc_server_info_get (vd=0x7f1951498010) at ui/vnc.c:233
 #12 0x0000559193c24275 in vnc_connect (vs=0x559197b2f200, vs=0x559197b2f200, event=QAPI_EVENT_VNC_CONNECTED) at ui/vnc.c:284
 #13 0x0000559193c24275 in vnc_connect (vd=vd@entry=0x7f1951498010, sioc=sioc@entry=0x559196bf9c00, skipauth=skipauth@entry=tru e, websocket=websocket@entry=false) at ui/vnc.c:3039
 #14 0x0000559193c25806 in vnc_display_add_client (id=<optimized out>, csock=<optimized out>, skipauth=<optimized out>)
     at ui/vnc.c:3877
 #15 0x0000559193a90c28 in qmp_marshal_add_client (args=<optimized out>, ret=<optimized out>, errp=0x7fffd7899f90)
     at qmp-marshal.c:105
 #16 0x000055919399c2b7 in handle_qmp_command (parser=<optimized out>, tokens=<optimized out>)
     at /home/berrange/src/virt/qemu/monitor.c:3971
 #17 0x0000559193ce3307 in json_message_process_token (lexer=0x559194ab0838, input=0x559194a6d940, type=JSON_RCURLY, x=111, y=1 2) at qobject/json-streamer.c:105
 #18 0x0000559193cfa90d in json_lexer_feed_char (lexer=lexer@entry=0x559194ab0838, ch=125 '}', flush=flush@entry=false)
     at qobject/json-lexer.c:319
 #19 0x0000559193cfaa1e in json_lexer_feed (lexer=0x559194ab0838, buffer=<optimized out>, size=<optimized out>)
     at qobject/json-lexer.c:369
 #20 0x0000559193ce33c9 in json_message_parser_feed (parser=<optimized out>, buffer=<optimized out>, size=<optimized out>)
     at qobject/json-streamer.c:124
 #21 0x000055919399a85b in monitor_qmp_read (opaque=<optimized out>, buf=<optimized out>, size=<optimized out>)
     at /home/berrange/src/virt/qemu/monitor.c:3987
 #22 0x0000559193a87d00 in tcp_chr_read (chan=<optimized out>, cond=<optimized out>, opaque=0x559194a7d900)
     at qemu-char.c:2895
 #23 0x00007f198b64f703 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
 #24 0x0000559193c484b3 in main_loop_wait () at main-loop.c:213
 #25 0x0000559193c484b3 in main_loop_wait (timeout=<optimized out>) at main-loop.c:258
 #26 0x0000559193c484b3 in main_loop_wait (nonblocking=<optimized out>) at main-loop.c:506
 #27 0x0000559193964c55 in main () at vl.c:1908
 #28 0x0000559193964c55 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4603

This was introduced in

  commit 98481bfcd6
  Author: Eric Blake <eblake@redhat.com>
  Date:   Mon Oct 26 16:34:45 2015 -0600

    vnc: Hoist allocation of VncBasicInfo to callers

which added error reporting for vnc_init_basic_info_from_server_addr
but didn't change the g_malloc calls to g_malloc0.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1470134726-15697-3-git-send-email-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 15:06:32 +02:00
Daniel P. Berrange
624cdd46d7 vnc: don't crash getting server info if lsock is NULL
When VNC is started with '-vnc none' there will be no
listener socket present. When we try to populate the
VncServerInfo we'll crash accessing a NULL 'lsock'
field.

 #0  qio_channel_socket_get_local_address (ioc=0x0, errp=errp@entry=0x7ffd5b8aa0f0) at io/channel-socket.c:33
 #1  0x00007f4b9a297d6f in vnc_init_basic_info_from_server_addr (errp=0x7ffd5b8aa0f0, info=0x7f4b9d425460, ioc=<optimized out>)  at ui/vnc.c:146
 #2  vnc_server_info_get (vd=0x7f4b9e858000) at ui/vnc.c:223
 #3  0x00007f4b9a29d318 in vnc_qmp_event (vs=0x7f4b9ef82000, vs=0x7f4b9ef82000, event=QAPI_EVENT_VNC_CONNECTED) at ui/vnc.c:279
 #4  vnc_connect (vd=vd@entry=0x7f4b9e858000, sioc=sioc@entry=0x7f4b9e8b3a20, skipauth=skipauth@entry=true, websocket=websocket @entry=false) at ui/vnc.c:2994
 #5  0x00007f4b9a29e8c8 in vnc_display_add_client (id=<optimized out>, csock=<optimized out>, skipauth=<optimized out>) at ui/v nc.c:3825
 #6  0x00007f4b9a18d8a1 in qmp_marshal_add_client (args=<optimized out>, ret=<optimized out>, errp=0x7ffd5b8aa230) at qmp-marsh al.c:123
 #7  0x00007f4b9a0b53f5 in handle_qmp_command (parser=<optimized out>, tokens=<optimized out>) at /usr/src/debug/qemu-2.6.0/mon itor.c:3922
 #8  0x00007f4b9a348580 in json_message_process_token (lexer=0x7f4b9c78dfe8, input=0x7f4b9c7350e0, type=JSON_RCURLY, x=111, y=5 9) at qobject/json-streamer.c:94
 #9  0x00007f4b9a35cfeb in json_lexer_feed_char (lexer=lexer@entry=0x7f4b9c78dfe8, ch=125 '}', flush=flush@entry=false) at qobj ect/json-lexer.c:310
 #10 0x00007f4b9a35d0ae in json_lexer_feed (lexer=0x7f4b9c78dfe8, buffer=<optimized out>, size=<optimized out>) at qobject/json -lexer.c:360
 #11 0x00007f4b9a348679 in json_message_parser_feed (parser=<optimized out>, buffer=<optimized out>, size=<optimized out>) at q object/json-streamer.c:114
 #12 0x00007f4b9a0b3a1b in monitor_qmp_read (opaque=<optimized out>, buf=<optimized out>, size=<optimized out>) at /usr/src/deb ug/qemu-2.6.0/monitor.c:3938
 #13 0x00007f4b9a186751 in tcp_chr_read (chan=<optimized out>, cond=<optimized out>, opaque=0x7f4b9c7add40) at qemu-char.c:2895
 #14 0x00007f4b92b5c79a in g_main_context_dispatch () from /lib64/libglib-2.0.so.0
 #15 0x00007f4b9a2bb0c0 in glib_pollfds_poll () at main-loop.c:213
 #16 os_host_main_loop_wait (timeout=<optimized out>) at main-loop.c:258
 #17 main_loop_wait (nonblocking=<optimized out>) at main-loop.c:506
 #18 0x00007f4b9a0835cf in main_loop () at vl.c:1934
 #19 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4667

Do an upfront check for a NULL lsock and report an error to
the caller, which matches behaviour from before

  commit 04d2529da2
  Author: Daniel P. Berrange <berrange@redhat.com>
  Date:   Fri Feb 27 16:20:57 2015 +0000

    ui: convert VNC server to use QIOChannelSocket

where getsockname() would be given a FD value -1 and thus report
an error to the caller.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1470134726-15697-2-git-send-email-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 15:06:32 +02:00
Juergen Gross
0e39bb022b xen: use a common function for pv and hvm guest backend register calls
Instead of calling xen_be_register() for each supported backend type
for hvm and pv guests in their machine init functions use a common
function in order not to have to add new backends twice.

This at once fixes the error that hvm domains couldn't use the qusb
backend.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
Message-id: 1470119552-16170-1-git-send-email-jgross@suse.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 14:52:11 +02:00
Peter Maydell
0cb34ff32e Merge remote-tracking branch 'remotes/berrange/tags/pull-qio-next-2016-08-03-v1' into staging
Merge qio-next 2016-08-03 v1

# gpg: Signature made Wed 03 Aug 2016 10:48:08 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qio-next-2016-08-03-v1:
  io: remove mistaken call to object_ref on QTask

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-03 13:43:28 +01:00
Peter Maydell
90f54472f4 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160803' into staging
qemu-2.7: ppc patch queue 2016-08-03

Here's the current set of patches (only 2) for spapr, ppc and related
things.  These are important bugfixes for the stabilizing 2.7 tree.

One is for a regression where confusion between x86 only and generic
KVM irq handling resulted in breakage on KVM/Power.  The other is
fixing (yet another) problem in the vcpu hotplug code: older pseries
machine types which don't support vcpu hotplug weren't correctly
advertising that, potentially leading to crashes or other problems.

# gpg: Signature made Wed 03 Aug 2016 06:23:40 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160803:
  kvm-irqchip: only commit route when irqchip is used
  spapr: Error out when CPU hotplug is attempted on older pseries machines

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-03 11:52:53 +01:00
Daniel P. Berrange
bc35d51077 io: remove mistaken call to object_ref on QTask
The QTask struct is just a standalone struct, not a QOM Object,
so calling object_ref() on it is not appropriate. This results
in mangling the 'destroy' field in the QTask struct, causing
the later call to qtask_free() to try to call the function
at address 0x1, with predictably segfault happy results.

There is in fact no need for ref counting with QTask, as the
call to qtask_abort() or qtask_complete() will automatically
free associated memory.

This fixes the crash shown in

  https://bugs.launchpad.net/qemu/+bug/1589923

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-08-03 10:28:50 +01:00
Juergen Gross
80440ea033 xen: drain submit queue in xen-usb before removing device
When unplugging a device in the Xen pvusb backend drain the submit
queue before deallocation of the control structures. Otherwise there
will be bogus memory accesses when I/O contracts are finished.

Correlated to this issue is the handling of cancel requests: a packet
cancelled will still lead to the call of complete, so add a flag
to the request indicating it should be just dropped on complete.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
Message-id: 1470140044-16492-3-git-send-email-jgross@suse.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 10:29:10 +02:00
Juergen Gross
c8e36e865c xen: when removing a backend don't remove many of them
When a Xenstore watch fires indicating a backend has to be removed
don't remove all backends for that domain with the specified device
index, but just the one which has the correct type.

The easiest way to achieve this is to use the already determined
xendev as parameter for xen_be_del_xendev() instead of only the domid
and device index.

This at once removes the open coded QTAILQ_FOREACH_SAVE() in
xen_be_del_xendev() as there is no need to search for the correct
xendev any longer.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Message-id: 1470140044-16492-2-git-send-email-jgross@suse.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 10:29:10 +02:00
Peter Xu
7005f7f81c kvm-irqchip: only commit route when irqchip is used
Reported from Alexey Kardashevskiy:

3f1fea0fb5 "kvm-irqchip: do explicit commit when update irq" produces
a crash on pseries guest running with VFIO on POWER8 machine as it does
not support KVM_CAP_IRQCHIP (KVM_CAP_IRQ_XICS is there instead). At the
result, KVMState::irq_routes is NULL when VFIO calls
kvm_irqchip_commit_routes.

This makes the routing update conditional.

Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-03 13:25:44 +10:00
Bharata B Rao
c8721d3599 spapr: Error out when CPU hotplug is attempted on older pseries machines
CPU hotplug and coldplug aren't supported prior to pseries-2.7.  Further,
earlier machine types don't use CPU core objects at all.  These mean that
query-hotpluggable-cpus and coldplug on older pseries machines will crash
QEMU.  It also means that hotpluggable_cpus flag in query-machines will
be incorrectly set to true for pseries < 2.7, since it is based on the
presence of the query_hotpluggable_cpus hook.

- Don't assign the query_hotpluggable_cpus hook for pseries < 2.7
- query_hotpluggable_cpus should therefore never be called on pseries <
  2.7, so add an assert
- spapr_core_pre_plug() should fail hot/cold plug attempts for pseries <
  2.7, since core objects are never used there
- spapr_core_plug() should therefore never be called for pseries < 2.7, so
  add an assert.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
[dwg: Change from query_hotpluggable_cpus returning NULL for pseries < 2.7
 to not being called at all, reword commit message for accuracy]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-03 13:08:54 +10:00
Peter Maydell
8b54a6a6c6 Merge remote-tracking branch 'remotes/ehabkost/tags/numa-pull-request' into staging
MAINTAINERS: Add Host Memory Backends section

# gpg: Signature made Tue 02 Aug 2016 12:24:56 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/numa-pull-request:
  MAINTAINERS: Add Host Memory Backends section

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-02 12:55:12 +01:00
Evgeny Yakovlev
72aa364b1d ehci: faster frame index calculation for skipped frames
ehci_update_frindex takes time linearly proportional to a number
of uframes to calculate new frame index and raise FLR interrupts,
which is a problem for large amounts of uframes.

If we experience large delays between echi timer callbacks (i.e. because
other periodic handlers have taken a lot of time to complete) we
get a lot of skipped frames which then delay ehci timer callback more
and this leads to deadlocking the system when ehci schedules next
callback to be too soon.

Observable behaviour is qemu consuming 100% host CPU time while guest
is unresponsive. This misbehavior could happen for a while and QEMU does
not get out from this state automatically without the patch.

This change makes ehci_update_frindex execute in constant time.

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-id: 1469638520-32706-1-git-send-email-den@openvz.org
CC: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-02 13:35:24 +02:00
Stefan Weil
c16e366464 wxx: Fix compilation of host-libusb.c
libusb.h uses the WINAPI calling convention for all function callbacks.

Cross compilation with Mingw-w64 on Cygwin fails when this calling
convention is missing.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
Message-id: 1469775331-7468-1-git-send-email-sw@weilnetz.de
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-02 13:33:47 +02:00
Stefan Weil
3bf2b3a172 wxx: Fix compiler warning for host-libusb.c
The local variable i is unsed for Windows.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
Message-id: 1469775569-7869-1-git-send-email-sw@weilnetz.de
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-02 13:33:47 +02:00
Eduardo Habkost
4fc264f49c MAINTAINERS: Add Host Memory Backends section
The hostmem code is closely related to the NUMA code, so I am
willing to handle patches to those files and share the work with
Igor (the original author of that code).

Acked-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-08-02 08:22:02 -03:00
Emilio G. Cota
f9dbc19e8b qdist: fix memory leak during binning
In qdist_bin__internal(), to->entries is initialized to a 1-element array,
which we then leak when n == from->n. Fix it.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1469459025-23606-2-git-send-email-cota@braap.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Dave Hansen
ba03584f4f target-i386: fix typo in xsetbv implementation
QEMU 2.6 added support for the XSAVE family of instructions, which
includes the XSETBV instruction which allows setting the XCR0
register.

But, when booting Linux kernels with XSAVE support enabled, I was
getting very early crashes where the instruction pointer was set
to 0x3.  I tracked it down to a jump instruction generated by this:

        gen_jmp_im(s->pc - pc_start);

where s->pc is pointing to the instruction after XSETBV and pc_start
is pointing _at_ XSETBV.  Subtract the two and you get 0x3.  Whoops.

The fix is to replace this typo with the pattern found everywhere
else in the file when folks want to end the translation buffer.

Richard Henderson confirmed that this is a bug and that this is the
correct fix.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: qemu-stable@nongnu.org
Cc: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Emilio G. Cota
7266ae91a1 qht: do not segfault when gathering stats from an uninitialized qht
So far, QHT functions assume that the passed qht has previously been
initialized--otherwise they segfault.

This patch makes an exception for qht_statistics_init, with the goal
of simplifying calling code. For instance, qht_statistics_init is
called from the 'info jit' dump, and given that under KVM the TB qht
is never initialized, we get a segfault. Thus, instead of complicating
the 'info jit' code with additional checks, let's allow passing an
uninitialized qht to qht_statistics_init.

While at it, add a test for this to test-qht.

Before the patch (for $ qemu -enable-kvm [...]):
(qemu) info jit
[...]
direct jump count   0 (0%) (2 jumps=0 0%)
Program received signal SIGSEGV, Segmentation fault.

After the patch the "TB hash buckets", "TB hash occupancy"
and "TB hash avg chain" lines are omitted.
(qemu) info jit
[...]
direct jump count   0 (0%) (2 jumps=0 0%)
TB hash buckets     0/0 (-nan% head buckets used)
TB hash occupancy   nan% avg chain occ. Histogram: (null)
TB hash avg chain   nan buckets. Histogram: (null)
[...]

Reported by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1469205390-14369-1-git-send-email-cota@braap.org>
[Extract printing statistics to an entirely separate function. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Cao jin
767db021bc util: Drop inet_listen()
Since commit e65c67e4, inet_listen() is not used anymore, and all
inet listen operation goes through QIOChannel.

Cc: Daniel P. Berrange <berrange@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Eric Blake <eblake@redhat.com>

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-Id: <1469451771-1173-3-git-send-email-caoj.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Cao jin
f8ea7a8656 util: drop unix_nonblocking_connect()
It is never used; all nonblocking connect now goes through
socket_connect(), which calls unix_connect_addr().

Cc: Daniel P. Berrange <berrange@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-Id: <1469097213-26441-3-git-send-email-caoj.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Cao jin
00432b6953 util: drop inet_nonblocking_connect()
It is never used; all nonblocking connect now goes through
socket_connect(), which calls inet_connect_addr().

Cc: Daniel P. Berrange <berrange@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-Id: <1469097213-26441-2-git-send-email-caoj.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Paolo Bonzini
3f822cff44 checkpatch: add check for bzero
Tested-By: Peter Xu <peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Igor Mammedov
056b68af77 fix qemu exit on memory hotplug when allocation fails at prealloc time
When adding hostmem backend at runtime, QEMU might exit with error:
  "os_mem_prealloc: Insufficient free host memory pages available to allocate guest RAM"

It happens due to os_mem_prealloc() not handling errors gracefully.

Fix it by passing errp argument so that os_mem_prealloc() could
report error to callers and undo performed allocation when
os_mem_prealloc() fails.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1469008443-72059-1-git-send-email-imammedo@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Greg Kurz
0b21757124 numa: set the memory backend "is_mapped" field
Commit 2aece63 "hostmem: detect host backend memory is being used properly"
added a way to know if a memory backend is busy or available for use. It
caused a slight regression if we pass the same backend to a NUMA node and
to a pc-dimm device:

-m 1G,slots=2,maxmem=2G \
-object memory-backend-ram,size=1G,id=mem-mem1 \
-device pc-dimm,id=dimm-mem1,memdev=mem-mem1 \
-numa node,nodeid=0,memdev=mem-mem1

Before commit 2aece63, this would cause QEMU to print an error message and
to exit gracefully:

qemu-system-ppc64: -device pc-dimm,id=dimm-mem1,memdev=mem-mem1:
    can't use already busy memdev: mem-mem1

Since commit 2aece63, QEMU hits an assertion in the memory code:

qemu-system-ppc64: memory.c:1934: memory_region_add_subregion_common:
    Assertion `!subregion->container' failed.
Aborted

This happens because pc-dimm devices don't use memory_region_is_mapped()
anymore and cannot guess the backend is already used by a NUMA node.

Let's revert to the previous behavior by turning the NUMA code to also
call host_memory_backend_set_mapped() when it uses a backend.

Fixes: 2aece63c8a
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <146891691503.15642.9817215371777203794.stgit@bahia.lan>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Paolo Bonzini
34506b30e4 util/qht: Document memory ordering assumptions
It is naturally expected that some memory ordering should be provided
around qht_insert() and qht_lookup(). Document these assumptions in the
header file and put some comments in the source to denote how that
memory ordering requirements are fulfilled.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[Sergey Fedorov: commit title and message provided;
comment on qht_remove() elided]
Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
Message-Id: <20160715175852.30749-2-sergey.fedorov@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Alistair Francis
cc0100f464 MAINTAINERS: Update the Xilinx maintainers
Update the Xilinx maintainers documentation to simplify what we maintain
and cover all of our upstream code.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-01 15:31:32 +01:00
Sean Bruno
ded554cdb4 Fix bsd-user build errors after 8642c1b81e
LINK  sparc-bsd-user/qemu-sparc
bsd-user/main.o: In function `cpu_loop':
/home/sbruno/bsd/qemu/bsd-user/main.c:515: undefined reference to `cpu_sparc_exec'
c++: error: linker command failed with exit code 1 (use -v to see invocation)
gmake[1]: *** [Makefile:197: qemu-sparc] Error 1
gmake: *** [Makefile:204: subdir-sparc-bsd-user] Error 2

  LINK  i386-bsd-user/qemu-i386
bsd-user/main.o: In function `cpu_loop':
/home/sbruno/bsd/qemu/bsd-user/main.c:174: undefined reference to `cpu_x86_exec'
c++: error: linker command failed with exit code 1 (use -v to see invocation)
gmake[1]: *** [Makefile:197: qemu-i386] Error 1
gmake: *** [Makefile:204: subdir-i386-bsd-user] Error 2

Signed-off-by:  Sean Bruno <sbruno@freebsd.org>
Message-id: 20160729160235.64525-1-sbruno@freebsd.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-01 14:30:31 +01:00
Peter Maydell
69d490079f Update version for v2.7.0-rc1 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 15:55:59 +01:00
Aaron Lindsay
71fcd8eb68 avx2 configure: Disable if static build
This avoids a segfault like the following for at least some 4.8 versions
of gcc when configured with --static if avx2 instructions are also
enabled:

	Program received signal SIGSEGV, Segmentation fault.
	buffer_find_nonzero_offset_ifunc () at ./util/cutils.c:333
	333     {
	(gdb) bt
	#0  buffer_find_nonzero_offset_ifunc () at ./util/cutils.c:333
	#1  0x0000000000939c58 in __libc_start_main ()
	#2  0x0000000000419337 in _start ()

Signed-off-by: Aaron Lindsay <alindsay@codeaurora.org>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 15:21:43 +01:00
Sean Bruno
cf4b61d581 Unbreak FreeBSD build after optionrom update.
Update the build flags appropriately for FreeBSD and add the
correct LD_EMULATION type for the FreeBSD build case.

Fixes FreeBSD build error:
	ld: unrecognised emulation mode: elf_i386
	Supported emulations: elf_x86_64_fbsd elf_i386_fbsd
	gmake[1]: *** [Makefile:51: linuxboot_dma.img] Error 1
	gmake: *** [Makefile:229: romsubdir-optionrom] Error 2

Signed-off-by: Sean Bruno <sbruno@freebsd.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 14:25:35 +01:00
Paolo Bonzini
036999e93e optionrom: fix detection of -Wa,-32
The cc-option macro runs $(CC) in -S mode (generate assembly) to avoid a
pointless run of the assembler.  However, this does not work when you want
to detect support for cc->as option passthrough.  clang ignores -Wa unless
-c is provided, and exits successfully even if the -Wa,-32 option is not
supported.

Reported-by: Stefan Hajnoczi <stefanha@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1469043409-14033-1-git-send-email-pbonzini@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 13:56:52 +01:00
Peter Maydell
c7e9aafe5c Merge remote-tracking branch 'remotes/lalrae/tags/mips-20160729' into staging
MIPS patches 2016-07-29

Changes:
* bug fixes

# gpg: Signature made Fri 29 Jul 2016 09:44:13 BST
# gpg:                using RSA key 0x52118E3C0B29DA6B
# gpg: Good signature from "Leon Alrae <leon.alrae@imgtec.com>"
# Primary key fingerprint: 8DD3 2F98 5495 9D66 35D4  4FC0 5211 8E3C 0B29 DA6B

* remotes/lalrae/tags/mips-20160729:
  target-mips: fix EntryHi.EHINV being cleared on TLB exception
  hw/mips_malta: Fix YAMON API print routine

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 13:05:55 +01:00
Peter Maydell
df2c35902e Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160729' into staging
ppc patch queue 2016-07-29

Here are the current pending ppc and spapr related patches for
qemu-2.7.  Given the freeze status, these are all bugfixes, with two
exceptions:

  * There's some final rework of the vcpu hotplug model.  Specifically
    we add spapr specific code on the generic basis Igor established
    to make cpu_index stable for pseries-2.7 and later machine types.
      - This allows us to remove the limitation that cpu cores had to
        be inserted in linear order, and removed in LIFO order.
      - This is worth merging this late in 2.7 because it will avoid
        considerable future grief with management layers needing to
        discover whether out-of-order hotplug is possible, amongst
        other things.
      - For now we do add a constraint that the initial cpu cannot be
        unplugged.
  * We add two extra testcases to make check, for postcopy and
    drive_del on ppc64.
      - Not strictly bugfixes, but safe, because they don't affect the
        actual code, and increase test coverage.

# gpg: Signature made Fri 29 Jul 2016 05:50:02 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160729:
  tests: add drive_del-test to ppc/ppc64
  spapr: Prevent boot CPU core removal
  ppc: Fix fault PC reporting for lve*/stve* VMX instructions
  test: port postcopy test to ppc64
  Revert "spapr: Ensure CPU cores are added contiguously and removed in LIFO order"
  spapr: init CPUState->cpu_index with index relative to core-id

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 12:37:08 +01:00
Peter Maydell
cbe81c6331 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc, pci, virtio: cleanups, fixes

a bunch of bugfixes and a couple of cleanups
making these easier and/or making debugging easier

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 29 Jul 2016 04:11:01 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (41 commits)
  mptsas: Fix a migration compatible issue
  vhost: do not update last avail idx on get_vring_base() failure
  vhost: add vhost_net_set_backend()
  vhost-user: add error report in vhost_user_write()
  tests: fix vhost-user-test leak
  tests: plug some leaks in virtio-net-test
  vhost-user: wait until backend init is completed
  char: add and use tcp_chr_wait_connected
  char: add chr_wait_connected callback
  vhost: add assert() to check runtime behaviour
  vhost-net: vhost_migration_done is vhost-user specific
  Revert "vhost-net: do not crash if backend is not present"
  vhost-user: add get_vhost_net() assertions
  vhost-user: keep vhost_net after a disconnection
  vhost-user: check vhost_user_{read,write}() return value
  vhost-user: check qemu_chr_fe_set_msgfds() return value
  vhost-user: call set_msgfds unconditionally
  qemu-char: fix qemu_chr_fe_set_msgfds() crash when disconnected
  vhost: use error_report() instead of fprintf(stderr,...)
  vhost: add missing VHOST_OPS_DEBUG
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 11:57:01 +01:00
Peter Maydell
aa2aac51f0 Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into staging
# gpg: Signature made Thu 28 Jul 2016 23:50:37 BST
# gpg:                using RSA key 0x7DEF8106AAFC390E
# gpg: Good signature from "John Snow (John Huston) <jsnow@redhat.com>"
# Primary key fingerprint: FAEB 9711 A12C F475 812F  18F2 88A9 064D 1835 61EB
#      Subkey fingerprint: F9B7 ABDB BCAC DF95 BE76  CBD0 7DEF 8106 AAFC 390E

* remotes/jnsnow/tags/ide-pull-request:
  ide: fix halted IO segfault at reset

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 11:01:38 +01:00
Laurent Vivier
059ce0f00a tests: add drive_del-test to ppc/ppc64
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 14:14:15 +10:00
Cao jin
f077f88912 mptsas: Fix a migration compatible issue
My previous commit 2e2aa316 removed internal flag msi_in_use, which
exists in vmstate, use VMSTATE_UNUSED for migration compatibility.

Reported-by: Amit Shah <amit.shah@redhat.com>
Suggested-by: Amit Shah <amit.shah@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
2016-07-29 06:09:55 +03:00
Marc-André Lureau
499c557975 vhost: do not update last avail idx on get_vring_base() failure
The state.num value will probably be 0 in this case, but that
doesn't make sense to update.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 05:47:17 +03:00
Bharata B Rao
62be8b044a spapr: Prevent boot CPU core removal
Boot CPU is assumed to be always present in QEMU code. So
until that assumptions are gone, deny removal request.
In another words, QEMU won't support boot CPU core hot-unplug.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
[dwg: Tweaked error message for clarity]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 12:02:31 +10:00
Benjamin Herrenschmidt
bcd510b141 ppc: Fix fault PC reporting for lve*/stve* VMX instructions
We forgot to do gen_update_nip() for these like we do with other
helpers. Fix this, but in a more efficient way by passing the RA
to the accessors instead so the overhead is only taken on faults.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 12:02:31 +10:00
lvivier@redhat.com
aaf89c8a49 test: port postcopy test to ppc64
As userfaultfd syscall is available on powerpc, migration
postcopy can be used.

This patch adds the support needed to test this on powerpc,
instead of using a bootsector to run code to modify memory,
we use a FORTH script in "boot-command" property.

As spapr machine doesn't support "-prom-env" argument
(the nvram is initialized by SLOF and not by QEMU),
"boot-command" is provided to SLOF via a file mapped nvram
(with "-drive file=...,if=pflash")

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 12:02:31 +10:00
David Gibson
7cdd76132a Revert "spapr: Ensure CPU cores are added contiguously and removed in LIFO order"
This reverts commit 5cbc64de25.

Now that we have stable cpu_index values for pseries-2.7 (and future)
machine types, we can now safely allow hotplug and unplug in any order.

Conflicts:
	hw/ppc/spapr_cpu_core.c

Some conflicts on revert due to some small changes in the inserted
code since the original commit.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 12:02:31 +10:00
Igor Mammedov
b63578bdb5 spapr: init CPUState->cpu_index with index relative to core-id
It will enshure that cpu_index for a given cpu stays the same
regardless of the order cpus has been created/deleted and so
it would be possible to migrate QEMU instance with out of order
created CPU.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 12:02:31 +10:00
John Snow
87ac25fd1f ide: fix halted IO segfault at reset
If one attempts to perform a system_reset after a failed IO request
that causes the VM to enter a paused state, QEMU will segfault trying
to free up the pending IO requests.

These requests have already been completed and freed, though, so all
we need to do is NULL them before we enter the paused state.

Existing AHCI tests verify that halted requests are still resumed
successfully after a STOP event.

Analyzed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Message-id: 1469635201-11918-2-git-send-email-jsnow@redhat.com
Signed-off-by: John Snow <jsnow@redhat.com>
2016-07-28 17:34:19 -04:00
Marc-André Lureau
950d94ba06 vhost: add vhost_net_set_backend()
Not all vhost-user backends support ops->vhost_net_set_backend(). It is
a nicer to provide an assert/error than to crash trying to
call. Furthermore, it improves a bit the code by hiding vhost_ops
details.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:49 +03:00
Marc-André Lureau
f6b8571041 vhost-user: add error report in vhost_user_write()
Similar to vhost_user_read() error report, it is useful to have early
error report.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:49 +03:00
Marc-André Lureau
69179fe2fc tests: fix vhost-user-test leak
Spotted by valgrind.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:49 +03:00
Marc-André Lureau
1ec3b71cde tests: plug some leaks in virtio-net-test
Found thanks to valgrind.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:49 +03:00
Marc-André Lureau
c89804d674 vhost-user: wait until backend init is completed
The chardev waits for an initial connection before starting qemu, and
vhost-user should wait for the backend negotiation to be completed
before starting qemu too.

vhost-user is started in the net_vhost_user_event callback, which is
synchronously called after the socket is connected. Use a
VhostUserState.started flag to indicate vhost-user init completed
successfully and qemu can be started.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:49 +03:00
Marc-André Lureau
d7a04fd7d5 char: add and use tcp_chr_wait_connected
Add a chr_wait_connected for the tcp backend, and use it in the
open_socket() function.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
6b6723c3b5 char: add chr_wait_connected callback
A function to wait on the backend to be connected, to be used in the
following patches.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
8695de0fcf vhost: add assert() to check runtime behaviour
All these functions must be called only after the backend is connected.
They are called from virtio-net.c, after either virtio or link status
change.

The check for nc->peer->link_down should ensure vhost_net_{start,stop}()
are always called between vhost_user_{start,stop}().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
51f7aca973 vhost-net: vhost_migration_done is vhost-user specific
Either the callback is mandatory to implement, in which case an assert()
is more appropriate, or it's not and we can't tell much whether the
function should fail or not (given it's name, I guess it should silently
success by default). Instead, make the implementation mandatory and
vhost-user specific to be more clear about its usage.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
bb12e761e8 Revert "vhost-net: do not crash if backend is not present"
Now that get_vhost_net() returns non-null after a successful
vhost_net_init(), we no longer need to check this case.

This reverts commit ecd34898596c60f79886061618dd7e01001113ad.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
1a5b68cee8 vhost-user: add get_vhost_net() assertions
Add a few assertions to be more explicit about the runtime behaviour
after the previous patch: get_vhost_net() is non-null after
net_vhost_user_init().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
e6bcb1b617 vhost-user: keep vhost_net after a disconnection
Many code paths assume get_vhost_net() returns non-null.

Keep VhostUserState.vhost_net after a successful vhost_net_init(),
instead of freeing it in vhost_net_cleanup().

VhostUserState.vhost_net is thus freed before after being recreated or
on final vhost_user_cleanup() and there is no need to save the acked
features.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
c4843a45e3 vhost-user: check vhost_user_{read,write}() return value
The vhost-user code is quite inconsistent with error handling. Instead
of ignoring some return values of read/write and silently going on with
invalid state (invalid read for example), break the code flow when the
error happened.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
6fab2f3f60 vhost-user: check qemu_chr_fe_set_msgfds() return value
Check qemu_chr_fe_set_msgfds() for errors, to make sure the message to
be sent is correct.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
df3485a148 vhost-user: call set_msgfds unconditionally
It is fine to call set_msgfds() with 0 fd, and ensures any previous fd
array is cleared.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
5c7eaabf65 qemu-char: fix qemu_chr_fe_set_msgfds() crash when disconnected
Calling qemu_chr_fe_set_msgfds() on unconnected socket leads to crash
since s->ioc is NULL in this case. Return an error earlier instead.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
4afba63120 vhost: use error_report() instead of fprintf(stderr,...)
Let's use qemu proper error reporting API, this ensures the error is
reported at the right place (stderr or monitor), with a conventional
format.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
c640969216 vhost: add missing VHOST_OPS_DEBUG
Add missing VHOST_OPS_DEBUG() logs, for completeness.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
162bba7fa8 vhost: do not assert() on vhost_ops failure
Calling a vhost operation may fail, for example with disconnected
vhost-user backend, but qemu shouldn't abort in this case.

Log an error instead, except on error and cleanup code paths where it
can be mostly ignored.

Let's use a VHOST_OPS_DEBUG macro to easily disable those messages once
disconnected backend stabilizes.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
a06db3ec72 vhost: fix calling vhost_dev_cleanup() after vhost_dev_init()
vhost_net_init() calls vhost_dev_init() and in case of failure, calls
vhost_dev_cleanup() directly. However, the structure is already
partially cleaned on error. Calling vhost_dev_cleanup() again will call
vhost_virtqueue_cleanup() on already clean queues, and causing potential
double-close. Instead, adjust dev->nvqs and simplify vhost_dev_init()
code to not call vhost_virtqueue_cleanup() but vhost_dev_cleanup()
instead.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
f1a0365b68 vhost-net: always call vhost_dev_cleanup() on failure
vhost_dev_init(), calling vhost backend initialization, should be
cleaned up after failure too. Call vhost_dev_cleanup() in all failure
cases. First, it needs to zero-alloc the struct to avoid the initial
garbage.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
e0547b59dc vhost: make vhost_dev_cleanup() idempotent
It is called on multiple code path, so make it safe to call several
times (note: I don't remember a reproducer here, but a function called
'cleanup' should probably be idempotent in my book)

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
5be5f9be72 vhost: fix cleanup on not fully initialized device
If vhost_dev_init() failed, caller may still call vhost_dev_cleanup()
later. However, vhost_dev_cleanup() tries to remove the device from the
list even if it wasn't yet added, which may lead to crashes. Similarly
for the memory listener.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
7b527247f0 vhost: assert the log was cleaned up
Make sure the log was released on cleanup, or it will leak (the
alternative is to call vhost_log_put() unconditionally, but it may hide
some dev state issues).

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
9e0bc24fa5 vhost: make vhost_log_put() idempotent
Although not strictly required, it is nice to have vhost_log_put()
safely callable multiple times.

Clear dev->log* when calling vhost_log_put() to make the function
idempotent. This also simplifies a bit the caller work.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
7cb8a9b9f2 vhost: don't assume opaque is a fd, use backend cleanup
vhost-dev opaque isn't necessarily an fd, it can be a chardev when using
vhost-user. Goto fail, so vhost_backend_cleanup() is called to handle
backend cleanup appropriately.

vhost_set_backend_type() should never fail, use an assert().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
9c7d18b3a5 vhost-user: disconnect on HUP
In some cases, qemu_chr_fe_read_all() on HUP event doesn't raise
CHR_EVENT_CLOSED because the read/recv function returns -1 on
disconnected peers (for example with tch_chr_recv, an ECONNRESET errno
overwritten as EIO).

It is simpler to explicitely disconnect on HUP, rising CHR_EVENT_CLOSED
if it wasn't disconnected already.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
d9d261142d vhost-user: minor simplification
Shorten the code and make it more clear by using the specialized
function g_str_has_prefix().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
01edc230d9 misc: indentation
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Prasad J Pandit
1e7aed7014 virtio: check vring descriptor buffer length
virtio back end uses set of buffers to facilitate I/O operations.
An infinite loop unfolds in virtqueue_pop() if a buffer was
of zero size. Add check to avoid it.

Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-29 00:07:10 +03:00
Marcel Apfelbaum
9a4c0e220d hw/virtio-pci: fix virtio behaviour
Enable transitional virtio devices by default.
Enable virtio-1.0 for devices plugged into
PCIe ports (Root ports or Downstream ports).

Using the virtio-1 mode will remove the limitation
of the number of devices that can be attached to a machine
by removing the need for the IO BAR.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-29 00:07:10 +03:00
Wei Jiangang
be0d9760d7 apb: convert init to realize
Convert a device model where initialization obviously can't fail,
make it implement realize() rather than init().

Signed-off-by: Wei Jiangang <weijg.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2016-07-29 00:07:09 +03:00
Wei Jiangang
86395eb31f hw/pci-bridge: Convert pxb initialization functions to Error
Firstly, convert pxb_dev_init_common() to Error and rename
it to pxb_dev_realize_common().
Actually, pxb_register_bus() is converted as well.

And then,
convert pxb_dev_initfn() and pxb_pcie_dev_initfn() to Error,
rename them to pxb_dev_realize() and pxb_pcie_dev_realize()
respectively.

Signed-off-by: Wei Jiangang <weijg.fnst@cn.fujitsu.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Marcel Apfelbaum
16de88a416 hw/apci: handle 64-bit MMIO regions correctly
In build_crs(), the calculation and merging of the ranges already happens
in 64-bit, but the entry boundaries are silently truncated to 32-bit in the
call to aml_dword_memory(). Fix it by handling the 64-bit MMIO ranges separately.
This fixes 64-bit BARs behind PXBs.

Reported-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Marcel Apfelbaum
2df5a7b52f acpi: refactor pxb crs computation
Instead of always passing both IO and MEM ranges when
computing CRS ranges, define a new CrsRangeSet structure
that include them both.

This is done before introducing a third type of range,
64-bit MEM, so it will be easier to pass them all around.

Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Marcel Apfelbaum
c99cb18eeb hw/acpi: fix a DSDT table issue when a pxb is present.
PXBs do not support hotplug so they don't have a PCNT function.
Since the PXB's PCI root-bus is a child bus of bus 0, the
build_dsdt code will add a call to the corresponding PCNT function.

Fix this by skipping the PCNT call for the above case.
While at it skip also PCIe child buses.

Reported-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Marcel Apfelbaum
7b346c742c hw/pxb: declare pxb devices as not hot-pluggable
Prevent future issues when hotplug will work for devices
attached to pxbs.

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Marcel Apfelbaum
2c533c5479 hw/pcie-root-port: Fix PCIe root port initialization
Specify the root port interrupt pin as part of the init
process for cases when msi/msix are not enabled.

Fixes "hw/pci/pci.c:196:23: runtime error: shift exponent -1 is negative"
warning from clang's sanitizer.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Michael S. Tsirkin
6b4495401b pcie: fix link active status bit migration
We changed link status register in pci express endpoint capability
over time. Specifically,

commit b2101eae63 ("pcie: Set the "link
active" in the link status register") set data link layer link active
bit in this register without adding compatibility to old machine types.

When migrating from qemu 2.3 and older this affects xhci devices which
under machine type 2.0 and older have a pci express endpoint capability
even if they are on a pci bus.

Add compatibility flags to make this bit value match what it was under
2.3.

Additionally, to avoid breaking migration from qemu 2.3 and up,
suppress checking link status during migration: this seems sane
since hardware can change link status at any time.

https://bugzilla.redhat.com/show_bug.cgi?id=1352860

Reported-by: Gerd Hoffmann <kraxel@redhat.com>
Fixes: b2101eae63
    ("pcie: Set the "link active" in the link status register")
Cc: qemu-stable@nongnu.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:08 +03:00
Leon Alrae
701074a6fc target-mips: fix EntryHi.EHINV being cleared on TLB exception
While implementing TLB invalidation feature we forgot to modify
part of code responsible for updating EntryHi during TLB exception.
Consequently EntryHi.EHINV is unexpectedly cleared on the exception.

Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-28 11:24:02 +01:00
Paul Burton
7f81dbb9a0 hw/mips_malta: Fix YAMON API print routine
The print routine provided as part of the in-built bootloader had a bug
in that it attempted to use a jump instruction as part of a loop, but
the target has its upper bits zeroed leading to control flow
transferring to 0xb0000814 rather than the intended 0xbfc00814. Fix this
by using a branch instruction instead, which seems more fit for purpose.

A simple way to test this is to build a Linux kernel with EVA enabled &
attempt to boot it in QEMU. It will attempt to print a message
indicating the configuration mismatch but QEMU would previously
incorrectly jump & wind up printing a continuous stream of the letter E.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: Leon Alrae <leon.alrae@imgtec.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-28 11:24:00 +01:00
Peter Maydell
21a21b853a Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
x86 and machine queue, 2016-07-27

Highlights:
* Fixes to allow CPU hotplug/unplug in any order;
* Exit QEMU on invalid global properties.

# gpg: Signature made Wed 27 Jul 2016 15:28:53 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request:
  vl: exit if a bad property value is passed to -global
  qdev: ignore GlobalProperty.errp for hotplugged devices
  machine: Add comment to abort path in machine_set_kernel_irqchip
  Revert "pc: Enforce adding CPUs contiguously and removing them in opposite order"
  pc: Init CPUState->cpu_index with index in possible_cpus[]
  qdev: Fix object reference leak in case device.realize() fails
  exec: Set cpu_index only if it's not been explictly set
  exec: Don't use cpu_index to detect if cpu_exec_init()'s been called
  exec: Reduce CONFIG_USER_ONLY ifdeffenery

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-27 18:18:21 +01:00
Peter Maydell
51313fe4f4 Merge remote-tracking branch 'remotes/stefanha/tags/CVE-2016-5403-virtio-unbounded-allocation-pull-request' into staging
# gpg: Signature made Wed 27 Jul 2016 16:13:02 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/CVE-2016-5403-virtio-unbounded-allocation-pull-request:
  virtio: error out if guest exceeds virtqueue size

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-27 17:26:07 +01:00
Peter Maydell
df5c50a208 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Tue 26 Jul 2016 21:51:38 BST
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  mirror: double performance of the bulk stage if the disc is full
  block/gluster: fix doc in the qapi schema and member name

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-27 16:31:01 +01:00
Greg Kurz
03f28efbbb vl: exit if a bad property value is passed to -global
When passing '-global driver=host-powerpc64-cpu,property=compat,value=foo'
on the command line, without this patch, we get the following warning per
device (which means many lines if the guests has many cpus):

qemu-system-ppc64: Warning: can't apply global host-powerpc64-cpu.compat=foo:
    Invalid compatibility mode "foo"

... and QEMU continues execution, ignoring the property.

With this patch, we get a single line:

qemu-system-ppc64: can't apply global host-powerpc64-cpu.compat=foo:
    Invalid compatibility mode "foo"

... and QEMU exits.

The previous behavior is kept for hotplugged devices since we don't want
QEMU to exit when doing device_add.

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-27 11:25:06 -03:00
Greg Kurz
b3443f43f4 qdev: ignore GlobalProperty.errp for hotplugged devices
This patch ensures QEMU won't terminate while hotplugging a device if the
global property cannot be set and errp points to error_fatal or error_abort.

While here, it also fixes indentation of the typename argument.

Suggested-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-27 11:25:06 -03:00
Greg Kurz
78a3930685 machine: Add comment to abort path in machine_set_kernel_irqchip
We're not supposed to abort when the user passes a bogus value.
Since the checking is done in visit_type_OnOffSplit(), the call
to abort() is legitimate. Let's add a comment to make it
explicit.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-27 11:25:06 -03:00
Stefan Hajnoczi
afd9096eb1 virtio: error out if guest exceeds virtqueue size
A broken or malicious guest can submit more requests than the virtqueue
size permits, causing unbounded memory allocation in QEMU.

The guest can submit requests without bothering to wait for completion
and is therefore not bound by virtqueue size.  This requires reusing
vring descriptors in more than one request, which is not allowed by the
VIRTIO 1.0 specification.

In "3.2.1 Supplying Buffers to The Device", the VIRTIO 1.0 specification
says:

  1. The driver places the buffer into free descriptor(s) in the
     descriptor table, chaining as necessary

and

  Note that the above code does not take precautions against the
  available ring buffer wrapping around: this is not possible since the
  ring buffer is the same size as the descriptor table, so step (1) will
  prevent such a condition.

This implies that placing more buffers into the virtqueue than the
descriptor table size is not allowed.

QEMU is missing the check to prevent this case.  Processing a request
allocates a VirtQueueElement leading to unbounded memory allocation
controlled by the guest.

Exit with an error if the guest provides more requests than the
virtqueue size permits.  This bounds memory allocation and makes the
buggy guest visible to the user.

This patch fixes CVE-2016-5403 and was reported by Zhenhao Hong from 360
Marvel Team, China.

Reported-by: Zhenhao Hong <hongzhenhao@360.cn>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-27 14:04:40 +01:00
Vladimir Sementsov-Ogievskiy
0965a41e99 mirror: double performance of the bulk stage if the disc is full
Mirror can do up to 16 in-flight requests, but actually on full copy
(the whole source disk is non-zero) in-flight is always 1. This happens
as the request is not limited in size: the data occupies maximum available
capacity of s->buf.

The patch limits the size of the request to some artificial constant
(1 Mb here), which is not that big or small. This effectively enables
back parallelism in mirror code as it was designed.

The result is important: the time to migrate 10 Gb disk is reduced from
~350 sec to 170 sec.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1468516741-82174-1-git-send-email-vsementsov@virtuozzo.com
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-26 16:23:36 -04:00
Prasanna Kumar Kalever
0a189ffb5e block/gluster: fix doc in the qapi schema and member name
1. qapi @BlockdevOptionsGluster schema member name s/debug_level/debug-level/
2. rearrange the versioning
3. s/server description/servers description/

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-Id: <1469198048-8535-1-git-send-email-prasanna.kalever@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-26 16:23:36 -04:00
Igor Mammedov
9527e7bde5 Revert "pc: Enforce adding CPUs contiguously and removing them in opposite order"
This reverts commit 4da7faaeb0.

Since commit:
  pc: init CPUState->cpu_index with index in possible_cpus[]
cpu_index is stable regardless of the order cpus were created
and QEMU instance stays migratable always so limitation added
by 4da7faaeb could be safely removed.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:32:13 -03:00
Igor Mammedov
a15d2728a9 pc: Init CPUState->cpu_index with index in possible_cpus[]
It will enshure that cpu_index for a given cpu stays the same
regardless of the order cpus has been created/deleted.

No compat code is needed as for initial cpus index in
possible_cpus[] matches cpu_index that's been auto-allocated
in cpu_exec_init().

Tha same applies for hotplug with cpu-add command if cpus are
added sequentially in increasing order as 'id' matches cpu_index.

If cpu-add had been used for creating out-of-order cpus,
that created unmigratable instance since it were not possible
to start target with the same cpu_index using old way
of migrating instance with hotplugged cpus:

* source QEMU with CLI (-smp 1,maxcpus=3 and cpu-add id=2)
  following set of cpu_index is allocated [0, 1] with
  apics set [0, 2] respectivelly
* target QEMU is started with CLI -smp 2,maxcpus=3
  resulting in set of cpu_index [0, 1] but with
  set of apics [0, 1] wich doesn't match source.

So we don't need compat code in this case as it's never worked
and newelly added device_add support would use stable cpu_index
set by machine to begin with, so it won't have above limitation
and source QEMU could be migrated to destination regardless
of the order cpus were created.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:32:08 -03:00
Igor Mammedov
69382d8b3e qdev: Fix object reference leak in case device.realize() fails
If device doesn't have parent assined before its realize
is called, device_set_realized() will implicitly set parent
to '/machine/unattached'.

However device_set_realized() may fail after that point at
several other points leaving not realized object dangling
in '/machine/unattached' and as result caller of

  obj = object_new()
    obj->ref == 1
  object_property_set_bool(obj,..., true, "realized",...)
    obj->ref == 2
  if (fail)
      object_unref(obj);
      obj->ref == 1

will get object leak instead of expected object destruction.

Fix it by making device_set_realized() to cleanup after itself
in case of failure.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:32:04 -03:00
Igor Mammedov
a07f953ef4 exec: Set cpu_index only if it's not been explictly set
It keeps the legacy behavior for all users that doesn't care
about stable cpu_index value, but would allow boards that
would support device_add/device_del to set stable cpu_index
that won't depend on order in which cpus are created/destroyed.

While at that simplify cpu_get_free_index() as cpu_index
generated by USER_ONLY and softmmu variants is the same
since none of the users support cpu-remove so far, except
of not yet released spapr/x86 device_add/delr, which
will be altered by follow up patches to set stable
cpu_index manually.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:32:01 -03:00
Igor Mammedov
8b1b835035 exec: Don't use cpu_index to detect if cpu_exec_init()'s been called
Instead use QTAIL's tqe_prev field to detect if cpu's been
placed in list by cpu_exec_init() which is always set if
QTAIL element is in list.

Fixes SIGSEGV on failure path in case cpu_index is assigned
by board and cpu.relalize() fails before cpu_exec_init() is called.

In follow up patches, cpu_index will be assigned by boards that
support cpu hot(un)plug and need stable cpu_index that doesn't
depend on order cpus are created/removed.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:32:00 -03:00
Igor Mammedov
1bc7e522d9 exec: Reduce CONFIG_USER_ONLY ifdeffenery
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:31:58 -03:00
Peter Maydell
c1fdfe9fca Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2016-07-26' into staging
Block patches for 2.7.0-rc1

# gpg: Signature made Tue 26 Jul 2016 18:11:36 BST
# gpg:                using RSA key 0x3BB14202E838ACAD
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40
#      Subkey fingerprint: 58B3 81CE 2DC8 9CF9 9730  EE64 3BB1 4202 E838 ACAD

* remotes/maxreitz/tags/pull-block-2016-07-26:
  iotest: fix python based IO tests
  block: export LUKS specific data to qemu-img info
  crypto: add support for querying parameters for block encryption
  AioContext: correct comments
  qcow2: do not allocate extra memory

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-26 18:22:49 +01:00
Daniel P. Berrange
4c44b4a4c8 iotest: fix python based IO tests
The previous commit refactoring iotests.py:

  commit 6661397446
  Author: Daniel P. Berrange <berrange@redhat.com>
  Date:   Wed Jul 20 14:23:10 2016 +0100

    scripts: refactor the VM class in iotests for reuse

was not properly tested and included a number of broken
bits.

 - The 'event_match' method was not moved into qemu.py
 - The 'self._args' list parameter in QEMUMachine needs
   to be copied otherwise modifications will affect the
   global 'qemu_opts' variable in iotests.py
 - The QEMUQtestMachine class methods had inverted
   parameter order for the super() calls
 - The QEMUQtestMachine class forgot to add
   '-machine accel=qtest'
 - The QEMUQtestMachine class constructor needs to set
   a default 'name' value before using it as it may
   be None
 - The QEMUQtestMachine class constructor needs to use
   named parameters when calling the super constructor
   as it is leaving out some positional parameters.
 - The 'qemu_prog' variable should be a string not a
   list in iotests.py
 - The VM classs constructor needs to use named
   parameters when calling the super constructor
   as it is leaving out some positional parameters.
 - The path to the socket-scm-helper needs to be
   passed into the QEMUMachine class

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1469549767-27249-1-git-send-email-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-26 18:28:40 +02:00
Daniel P. Berrange
c7c4cf498f block: export LUKS specific data to qemu-img info
The qemu-img info command has the ability to expose format
specific metadata about volumes. Wire up this facility for
the LUKS driver to report on cipher configuration and key
slot usage.

    $ qemu-img info ~/VirtualMachines/demo.luks
    image: /home/berrange/VirtualMachines/demo.luks
    file format: luks
    virtual size: 98M (102760448 bytes)
    disk size: 100M
    encrypted: yes
    Format specific information:
        ivgen alg: plain64
        hash alg: sha1
        cipher alg: aes-128
        uuid: 6ddee74b-3a22-408c-8909-6789d4fa2594
        cipher mode: xts
        slots:
            [0]:
                active: true
                iters: 572706
                key offset: 4096
                stripes: 4000
            [1]:
                active: false
                key offset: 135168
            [2]:
                active: false
                key offset: 266240
            [3]:
                active: false
                key offset: 397312
            [4]:
                active: false
                key offset: 528384
            [5]:
                active: false
                key offset: 659456
            [6]:
                active: false
                key offset: 790528
            [7]:
                active: false
                key offset: 921600
        payload offset: 2097152
        master key iters: 142375

One somewhat undesirable artifact is that the data fields are
printed out in (apparently) random order. This will be addressed
later by changing the way the block layer pretty-prints the
image specific data.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1469192015-16487-3-git-send-email-berrange@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-26 17:46:37 +02:00
Daniel P. Berrange
40c8502822 crypto: add support for querying parameters for block encryption
When creating new block encryption volumes, we accept a list of
parameters to control the formatting process. It is useful to
be able to query what those parameters were for existing block
devices. Add a qcrypto_block_get_info() method which returns a
QCryptoBlockInfo instance to report this data.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1469192015-16487-2-git-send-email-berrange@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-26 17:46:37 +02:00
Cao jin
54a16a63d0 AioContext: correct comments
Correct comments of field notify_me

Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Max Reitz <mreitz@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-id: 1468575858-22975-1-git-send-email-caoj.fnst@cn.fujitsu.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-26 17:46:37 +02:00
Vladimir Sementsov-Ogievskiy
ebf7bba090 qcow2: do not allocate extra memory
There are no needs to allocate more than one cluster, as we set
avail_out for deflate to one cluster.

Zlib docs (http://www.zlib.net/manual.html) says:
"deflate compresses as much data as possible, and stops when the input
buffer becomes empty or the output buffer becomes full."

So, deflate will not write more than avail_out to output buffer. If
there is not enough space in output buffer for compressed data (it may
be larger than input data) deflate just returns Z_OK. (if all data is
compressed and written to output buffer deflate returns Z_STREAM_END).

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 1468515565-81313-1-git-send-email-vsementsov@virtuozzo.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-26 17:46:37 +02:00
Peter Maydell
f49ee630d7 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160726' into staging
ppc patch queue 2016-07-26

Here's the current batch of ppc and spapr related patches intended for
qemu-2.7.  Given the late stage in 2.7 development, these are all
bugfixes with one exception:

The "spapr: disintricate core-id from DT semantics" changes the way
ids are assigned in the new core-based hotplug infrastructure.  This
isn't strictly a bugfix, but we've determined that the current way of
assigning core-ids will cause considerable grief with future plans for
cpu hotplug.  Therefore it's better to fix this now, late in 2.7,
before we have a released version with the problematic numbering.

# gpg: Signature made Tue 26 Jul 2016 04:04:57 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160726:
  spapr: disintricate core-id from DT semantics
  target-ppc: add PPC_MFTB flag to e500mc and e5500
  spapr: fix spapr-nvram migration
  hw/ppc/spapr: Make sure to close the htab_fd when migration is canceled
  ppc: Huge page detection mechanism fixes - Episode III

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-26 11:53:47 +01:00
Peter Maydell
a467bb9940 Merge remote-tracking branch 'remotes/mdroth/tags/qga-pull-2016-07-25-tag' into staging
qemu-ga patch queue for 2.7

* fix w32 build failures due to -Werror when building with VSS/fsfreeze
  enabled
* fix leaking for qemu-ga config files in `make check`

# gpg: Signature made Mon 25 Jul 2016 20:01:09 BST
# gpg:                using RSA key 0x3353C9CEF108B584
# gpg: Good signature from "Michael Roth <flukshun@gmail.com>"
# gpg:                 aka "Michael Roth <mdroth@utexas.edu>"
# gpg:                 aka "Michael Roth <mdroth@linux.vnet.ibm.com>"
# Primary key fingerprint: CEAC C9E1 5534 EBAB B82D  3FA0 3353 C9CE F108 B584

* remotes/mdroth/tags/qga-pull-2016-07-25-tag:
  configure: mark qemu-ga VSS includes as system headers
  tests: use static qga config file
  build-sys: link tests/data

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-26 10:53:23 +01:00
Michael Roth
690604f696 configure: mark qemu-ga VSS includes as system headers
As of e4650c81, we do w32 builds with -Werror enabled. Unfortunately
for cases where we enable VSS support in qemu-ga, we still have
warnings generated by VSS includes that ship as part of the Microsoft
VSS SDK.

We can selectively address a number of these warnings using

  #pragma GCC diagnostic ignored ...

but at least one of these:

  warning: ‘typedef’ was ignored in this declaration

resulting from declarations of the form:

  typedef struct Blah { ... };

does not provide a specific command-line/pragma option to disable
warnings of the sort.

To allow VSS builds to succeed, the next-best option is disabling
these warnings on a per-file basis. pragmas like #pragma GCC
system_header can be used to declare subsequent includes/declarations
as being exempt from normal warnings, but this must be done within
a header file.

Since we don't control the VSS SDK, we'd need to rely on a
intermediate header include to accomplish this, and
since different objects in the VSS link target rely on different
headers from the VSS SDK, this would become somewhat of a rat's nest
(though not totally unmanageable).

The next step up in granularity is just marking the entire VSS
SDK include path as system headers via -isystem. This is a bit more
heavy-handed, but since this SDK hasn't changed since 2005, there's
likely little to be gained from selectively disabling warnings
anyway, so we implement that approach here.

This fixes the -Werror failures in both the configure test and the
qga build due to shared reliance on $vss_win32_include. For the
same reason, this also enforces a new dependency on -isystem support
in the C/C++ compiler when building QGA with VSS enabled.

Cc: Thomas Huth <thuth@redhat.com>
Cc: Stefan Weil <sw@weilnetz.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2016-07-25 13:23:18 -05:00
Marc-André Lureau
1741b945f2 tests: use static qga config file
Do not create a leaking temporary file, but use a static file instead.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2016-07-25 13:23:18 -05:00
Marc-André Lureau
fe31017f79 build-sys: link tests/data
Link a common tests data directory to the build directory.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2016-07-25 13:23:17 -05:00
Greg Kurz
12bf2d33fe spapr: disintricate core-id from DT semantics
The goal of this patch is to have a stable core-id which does not depend
on any DT related semantics, which involve non-obvious computations on
modern PowerPC server cpus.

With this patch, the DT core id is computed on-demand as:

       (core-id / smp_threads) * smt

where smt is the number of threads per core in the host.

This formula should be consolidated in a helper since it is needed in
several places.

Other uses for core-id includes: compute a stable cpu_index (which
allows random order hotplug/unplug without breaking migration) and
NUMA.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-25 15:43:41 +10:00
Michael Walle
2fff4bad40 target-ppc: add PPC_MFTB flag to e500mc and e5500
According to the e500mc and e5500 core reference manual they have support
for the mftb instruction.

Signed-off-by: Michael Walle <michael@walle.cc>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-25 11:18:49 +10:00
lvivier@redhat.com
cf472f48d5 spapr: fix spapr-nvram migration
When spapr-nvram is backed by a file using pflash interface,
migration fails on the destination guest with assert:

    bdrv_co_pwritev: Assertion `!(bs->open_flags & 0x0800)' failed.

This avoids the problem by delaying the pflash update until after
the device loads complete.

This fix is similar to the one for the pflash_cfi01 migration:

    90c647d Fix pflash migration

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-25 10:19:30 +10:00
Thomas Huth
c573fc03da hw/ppc/spapr: Make sure to close the htab_fd when migration is canceled
When canceling a migration process, we currently do not close the
HTAB migration file descriptor since htab_save_complete() is never
called in that case. So we leave the migration process with a
dangling htab_fd value around, and this causes any further migration
attempts to fail. To fix this issue, simply make sure that the
htab_fd is closed during the migration cleanup stage. And since the
cleanup() function is also called when migration succeeds, we can
also remove the call to close_htab_fd() from the htab_save_complete()
function.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1354341
Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-25 10:19:30 +10:00
Thomas Huth
3d4f253483 ppc: Huge page detection mechanism fixes - Episode III
After already fixing two issues with the huge page detection mechanism
(see commit 159d2e39a8 and 86b50f2e1b), Greg Kurz noticed another
case that caused the guest to crash where QEMU announces huge pages
though they should not be available for the guest:

qemu-system-ppc64 -enable-kvm ... -mem-path /dev/hugepages \
 -m 1G,slots=4,maxmem=32G
 -object memory-backend-ram,policy=default,size=1G,id=mem-mem1 \
 -device pc-dimm,id=dimm-mem1,memdev=mem-mem1 -smp 2 \
 -numa node,nodeid=0 -numa node,nodeid=1

That means if there is a global mem-path option, we still have
to look at the memory-backend objects that have been specified
additionally and return their minimum page size if that value
is smaller than the page size of the main memory.

Reported-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-25 10:19:30 +10:00
Peter Maydell
2d2e632ad0 Update version for v2.7.0-rc0 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-22 15:32:42 +01:00
Peter Maydell
01a720125f target-sh4: Use glib allocator in movcal helper
Coverity spots that helper_movcal() calls malloc() but doesn't
check for failure. Fix this by switching to the glib allocation
functions, which abort on allocation failure.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1468327859-21385-1-git-send-email-peter.maydell@linaro.org
Acked-by: Aurelien Jarno <aurelien@aurel32.net>
2016-07-22 11:33:24 +01:00
Peter Maydell
e3643d32ee Merge remote-tracking branch 'remotes/amit-migration/tags/migration-for-2.7-6' into staging
Migration:
- Fix a postcopy bug
- Add a testsuite for measuring migration performance

# gpg: Signature made Fri 22 Jul 2016 08:56:44 BST
# gpg:                using RSA key 0xEB0B4DFC657EF670
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"
# Primary key fingerprint: 48CA 3722 5FE7 F4A8 B337  2735 1E9A 3B5F 8540 83B6
#      Subkey fingerprint: CC63 D332 AB8F 4617 4529  6534 EB0B 4DFC 657E F670

* remotes/amit-migration/tags/migration-for-2.7-6:
  tests: introduce a framework for testing migration performance
  scripts: ensure monitor socket has SO_REUSEADDR set
  scripts: set timeout when waiting for qemu monitor connection
  scripts: refactor the VM class in iotests for reuse
  scripts: add a 'debug' parameter to QEMUMonitorProtocol
  scripts: add __init__.py file to scripts/qmp/
  migration: set state to post-migrate on failure

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-22 10:51:32 +01:00
Daniel P. Berrange
409437e16d tests: introduce a framework for testing migration performance
This introduces a moderately general purpose framework for
testing performance of migration.

The initial guest workload is provided by the included 'stress'
program, which is configured to spawn one thread per guest CPU
and run a maximally memory intensive workload. It will loop
over GB of memory, xor'ing each byte with data from a 4k array
of random bytes. This ensures heavy read and write load across
all of guest memory to stress the migration performance. While
running the 'stress' program will record how long it takes to
xor each GB of memory and print this data for later reporting.

The test engine will spawn a pair of QEMU processes, either on
the same host, or with the target on a remote host via ssh,
using the host kernel and a custom initrd built with 'stress'
as the /init binary. Kernel command line args are set to ensure
a fast kernel boot time (< 1 second) between launching QEMU and
the stress program starting execution.

None the less, the test engine will initially wait N seconds for
the guest workload to stablize, before starting the migration
operation. When migration is running, the engine will use pause,
post-copy, autoconverge, xbzrle compression and multithread
compression features, as well as downtime & bandwidth tuning
to encourage completion. If migration completes, the test engine
will wait N seconds again for the guest workooad to stablize on
the target host. If migration does not complete after a preset
number of iterations, it will be aborted.

While the QEMU process is running on the source host, the test
engine will sample the host CPU usage of QEMU as a whole, and
each vCPU thread. While migration is running, it will record
all the stats reported by 'query-migration'. Finally, it will
capture the output of the stress program running in the guest.

All the data produced from a single test execution is recorded
in a structured JSON file. A separate program is then able to
create interactive charts using the "plotly" python + javascript
libraries, showing the characteristics of the migration.

The data output provides visualization of the effect on guest
vCPU workloads from the migration process, the corresponding
vCPU utilization on the host, and the overall CPU hit from
QEMU on the host. This is correlated from statistics from the
migration process, such as downtime, vCPU throttling and iteration
number.

While the tests can be run individually with arbitrary parameters,
there is also a facility for producing batch reports for a number
of pre-defined scenarios / comparisons, in order to be able to
get standardized results across different hardware configurations
(eg TCP vs RDMA, or comparing different VCPU counts / memory
sizes, etc).

To use this, first you must build the initrd image

 $ make tests/migration/initrd-stress.img

To run a a one-shot test with all default parameters

 $ ./tests/migration/guestperf.py > result.json

This has many command line args for varying its behaviour.
For example, to increase the RAM size and CPU count and
bind it to specific host NUMA nodes

 $ ./tests/migration/guestperf.py \
       --mem 4 --cpus 2 \
       --src-mem-bind 0 --src-cpu-bind 0,1 \
       --dst-mem-bind 1 --dst-cpu-bind 2,3 \
       > result.json

Using mem + cpu binding is strongly recommended on NUMA
machines, otherwise the guest performance results will
vary wildly between runs of the test due to lucky/unlucky
NUMA placement, making sensible data analysis impossible.

To make it run across separate hosts:

 $ ./tests/migration/guestperf.py \
       --dst-host somehostname > result.json

To request that post-copy is enabled, with switchover
after 5 iterations

 $ ./tests/migration/guestperf.py \
       --post-copy --post-copy-iters 5 > result.json

Once a result.json file is created, a graph of the data
can be generated, showing guest workload performance per
thread and the migration iteration points:

 $ ./tests/migration/guestperf-plot.py --output result.html \
        --migration-iters --split-guest-cpu result.json

To further include host vCPU utilization and overall QEMU
utilization

 $ ./tests/migration/guestperf-plot.py --output result.html \
        --migration-iters --split-guest-cpu \
	--qemu-cpu --vcpu-cpu result.json

NB, the 'guestperf-plot.py' command requires that you have
the plotly python library installed. eg you must do

 $ pip install --user  plotly

Viewing the result.html file requires that you have the
plotly.min.js file in the same directory as the HTML
output. This js file is installed as part of the plotly
python library, so can be found in

  $HOME/.local/lib/python2.7/site-packages/plotly/offline/plotly.min.js

The guestperf-plot.py program can accept multiple json files
to plot, enabling results from different configurations to
be compared.

Finally, to run the entire standardized set of comparisons

  $ ./tests/migration/guestperf-batch.py \
       --dst-host somehost \
       --mem 4 --cpus 2 \
       --src-mem-bind 0 --src-cpu-bind 0,1 \
       --dst-mem-bind 1 --dst-cpu-bind 2,3
       --output tcp-somehost-4gb-2cpu

will store JSON files from all scenarios in the directory
named tcp-somehost-4gb-2cpu

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-7-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:39 +05:30
Daniel P. Berrange
168ae6c24b scripts: ensure monitor socket has SO_REUSEADDR set
If tests use a TCP based monitor socket, the connection will
go into a TIMED_WAIT state when the test exits. This will
randomly prevent the test from being re-run without a certain
time period. Set the SO_REUSEADDR flag on the socket to ensure
we can immediately re-run the tests

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-6-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:35 +05:30
Daniel P. Berrange
238064621f scripts: set timeout when waiting for qemu monitor connection
If QEMU fails to launch for some reason, the QEMUMonitorProtocol
class accept() method will wait forever in a socket accept call.
Set a timeout of 15 seconds so that we fail more gracefully
instead of hanging the test script forever

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-5-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:28 +05:30
Daniel P. Berrange
6661397446 scripts: refactor the VM class in iotests for reuse
The iotests module has a python class for controlling QEMU
processes. Pull the generic functionality out of this file
and create a scripts/qemu.py module containing a QEMUMachine
class. Put the QTest integration support into a subclass
QEMUQtestMachine.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-4-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:24 +05:30
Daniel P. Berrange
991e7c4650 scripts: add a 'debug' parameter to QEMUMonitorProtocol
Add a 'debug' parameter to the QEMUMonitorProtocol class
which will cause it to print out all JSON strings on
sys.stderr

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-3-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:17 +05:30
Daniel P. Berrange
6f7a4a81ce scripts: add __init__.py file to scripts/qmp/
When searching for modules to load, python will ignore any
sub-directory which does not contain __init__.py. This means
that both scripts and scripts/qmp/ have to be explicitly added
to the python path. By adding a __init__.py file to scripts/qmp,
we only need add scripts/ to the python path and can then simply
do 'from qmp import qmp' to load scripts/qmp/qmp.py.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-2-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:13 +05:30
Dr. David Alan Gilbert
42da5550d6 migration: set state to post-migrate on failure
If a migration fails/is cancelled during the postcopy stage we currently
end up with the runstate as finish-migrate, where it should be post-migrate.
There's a small window in precopy where I think the same thing can
happen, but I've never seen it.

It rarely matters; the only postcopy case is if you restart a migration, which
again is a case that rarely matters in postcopy because it's only
safe to restart the migration if you know the destination hasn't
been running (which you might if you started the destination with -S
and hadn't got around to 'c' ing it before the postcopy failed).
Even then it's a small window but potentially you could hit if
there's a problem loading the devices on the destination.

This corresponds to:
https://bugzilla.redhat.com/show_bug.cgi?id=1355683

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1468601086-32117-1-git-send-email-dgilbert@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:09 +05:30
Peter Maydell
206d0c2436 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc, pci, virtio: new features, cleanups, fixes

- interrupt remapping for intel iommus
- a bunch of virtio cleanups
- fixes all over the place

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Thu 21 Jul 2016 18:49:30 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (57 commits)
  intel_iommu: avoid unnamed fields
  virtio: Update migration docs
  virtio-gpu: Wrap in vmstate
  virtio-gpu: Use migrate_add_blocker for virgl migration blocking
  virtio-input: Wrap in vmstate
  9pfs: Wrap in vmstate
  virtio-serial: Wrap in vmstate
  virtio-net: Wrap in vmstate
  virtio-balloon: Wrap in vmstate
  virtio-rng: Wrap in vmstate
  virtio-blk: Wrap in vmstate
  virtio-scsi: Wrap in vmstate
  virtio: Migration helper function and macro
  virtio-serial: Remove old migration version support
  virtio-net: Remove old migration version support
  virtio-scsi: Replace HandleOutput typedef
  Revert "mirror: Workaround for unexpected iohandler events during completion"
  virtio-scsi: Call virtio_add_queue_aio
  virtio-blk: Call virtio_add_queue_aio
  virtio: Introduce virtio_add_queue_aio
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-21 20:12:37 +01:00
Michael S. Tsirkin
bc38ee10fc intel_iommu: avoid unnamed fields
Also avoid unnamed fields for portability.
Also, rename VTD_IRTE to VTD_IR_TableEntry for coding
style compliance.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
1a210f631b virtio: Update migration docs
Remove references to register_savevm.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
0fc07498da virtio-gpu: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
de8892215e virtio-gpu: Use migrate_add_blocker for virgl migration blocking
virgl conditionally registers a vmstate as unmigratable when virgl
is enabled; instead use the migrate_add_blocker mechanism.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
428d2ed2c8 virtio-input: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
18e0e5b240 9pfs: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
42e6c0390b virtio-serial: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
290c242845 virtio-net: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
7f1ca9b23b virtio-balloon: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
b607579386 virtio-rng: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
bbded32c64 virtio-blk: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
5a289a2883 virtio-scsi: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
5943124cc0 virtio: Migration helper function and macro
To make conversion of virtio devices to VMState simple
at first add a helper function for the simple virtio_save
case and a helper macro that defines the VMState structure.
These will probably go away or change as more of the virtio
code gets converted.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Dr. David Alan Gilbert
71945ae164 virtio-serial: Remove old migration version support
virtio-serial-bus has had version 3 since 37f95bf3d0 in 0.13-rc0;
it's time to clean it up a bit.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Dr. David Alan Gilbert
76010cb320 virtio-net: Remove old migration version support
virtio-net has had version 11 since 0ce0e8f4 in 2009
(v0.11.0-rc0-1480-g0ce0e8f) - remove the code to support loading
anything earlier.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
209b27bbe9 virtio-scsi: Replace HandleOutput typedef
There is a new common one in virtio.h, use it.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
d4a92a8420 Revert "mirror: Workaround for unexpected iohandler events during completion"
This reverts commit ab27c3b5e7.

The virtio storage device host notifiers now work with
bdrv_drained_begin/end, so we don't need this hack any more.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
1c627137c1 virtio-scsi: Call virtio_add_queue_aio
AIO based handler is more appropriate here because it will then
cooperate with bdrv_drained_begin/end. It is needed by the coming
revert patch.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
0ff841f6d1 virtio-blk: Call virtio_add_queue_aio
AIO based handler is more appropriate here because it will then
cooperate with bdrv_drained_begin/end. It is needed by the coming
revert patch.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
872dd82c83 virtio: Introduce virtio_add_queue_aio
Using this function instead of virtio_add_queue marks the vq as aio
based. This differentiation will be useful in later patches.

Distinguish between virtqueue processing in the iohandler context and main loop
AioContext.  iohandler context is isolated from AioContexts and therefore does
not run during aio_poll().

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
bf1780b0d5 virtio: Add typedef for handle_output
The function pointer signature has been repeated a few times, using a
typedef may make coding easier.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
4684a20410 intel_iommu: disallow kernel-irqchip=on with IR
When user specify "intremap=on" with "-M kernel-irqchip=on", throw error
and then quit.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
54a6c11b20 kvm-all: add trace events for kvm irqchip ops
These will help us monitoring irqchip route activities more easily.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Radim Krčmář
a3f409cb4a intel_iommu: support all masks in interrupt entry cache invalidation
Linux guests do not gracefully handle cases when the invalidation mask
they wanted is not supported, probably because real hardware always
allowed all.

We can just say that all 16 masks are supported, because both
ioapic_iec_notifier and kvm_update_msi_routes_all invalidate all caches.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
3f1fea0fb5 kvm-irqchip: do explicit commit when update irq
In the past, we are doing gsi route commit for each irqchip route
update. This is not efficient if we are updating lots of routes in the
same time. This patch removes the committing phase in
kvm_irqchip_update_msi_route(). Instead, we do explicit commit after all
routes updated.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
e1d4fb2de5 kvm-irqchip: x86: add msi route notify fn
One more IEC notifier is added to let msi routes know about the IEC
changes. When interrupt invalidation happens, all registered msi routes
will be updated for all PCI devices.

Since both vfio and vhost are possible gsi route consumers, this patch
will go one step further to keep them safe in split irqchip mode and
when irqfd is enabled.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
[move trace-events lines into target-i386/trace-events]
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
38d87493f3 kvm-irqchip: i386: add hook for add/remove virq
Adding two hooks to be notified when adding/removing msi routes. There
are two kinds of MSI routes:

- in kvm_irqchip_add_irq_route(): before assigning IRQFD. Used by
  vhost, vfio, etc.

- in kvm_irqchip_send_msi(): when sending direct MSI message, if
  direct MSI not allowed, we will first create one MSI route entry
  in the kernel, then trigger it.

This patch only hooks the first one (irqfd case). We do not need to
take care for the 2nd one, since it's only used by QEMU userspace
(kvm-apic) and the messages will always do in-time translation when
triggered. While we need to note them down for the 1st one, so that we
can notify the kernel when cache invalidation happens.

Also, we do not hook IOAPIC msi routes (we have explicit notifier for
IOAPIC to keep its cache updated). We only need to care about irqfd
users.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
d1f6af6a17 kvm-irqchip: simplify kvm_irqchip_add_msi_route
Changing the original MSIMessage parameter in kvm_irqchip_add_msi_route
into the vector number. Vector index provides more information than the
MSIMessage, we can retrieve the MSIMessage using the vector easily. This
will avoid fetching MSIMessage every time before adding MSI routes.

Meanwhile, the vector info will be used in the coming patches to further
enable gsi route update notifications.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:18 +03:00
Peter Xu
ede9c94acf intel_iommu: add SID validation for IR
This patch enables SID validation. Invalid interrupts will be dropped.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:16 +03:00
Jan Kiszka
28589311b3 intel_iommu: Add support for Extended Interrupt Mode
As neither QEMU nor KVM support more than 255 CPUs so far, this is
simple: we only need to switch the destination ID translation in
vtd_remap_irq_get if EIME is set.

Once CFI support is there, it will have to take EIM into account as
well. So far, nothing to do for this.

This patch allows to use x2APIC in split irqchip mode of KVM.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
[use le32_to_cpu() to retrieve dest_id]
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Peter Xu
e3d9c92507 ioapic: register IOMMU IEC notifier for ioapic
Let IOAPIC the first consumer of x86 IOMMU IEC invalidation
notifiers. This is only used for split irqchip case, when vIOMMU
receives IR invalidation requests, IOAPIC will be notified to update
kernel irq routes. For simplicity, we just update all IOAPIC routes,
even if the invalidated entries are not IOAPIC ones.

Since now we are creating IOMMUs using "-device" parameter, IOMMU
device will be created after IOAPIC.  We need to do the registration
after machine done by leveraging machine_done notifier.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Peter Xu
02a2cbc872 x86-iommu: introduce IEC notifiers
This patch introduces x86 IOMMU IEC (Interrupt Entry Cache)
invalidation notifier list. When vIOMMU receives IEC invalidate
request, all the registered units will be notified with specific
invalidation requests.

Intel IOMMU is the first provider that generates such a event.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Peter Xu
8b5ed7dffa intel_iommu: add support for split irqchip
In split irqchip mode, IOAPIC is working in user space, only update
kernel irq routes when entry changed. When IR is enabled, we directly
update the kernel with translated messages. It works just like a kernel
cache for the remapping entries.

Since KVM irqfd is using kernel gsi routes to deliver interrupts, as
long as we can support split irqchip, we will support irqfd as
well. Also, since kernel gsi routes will cache translated interrupts,
irqfd delivery will not suffer from any performance impact due to IR.

And, since we supported irqfd, vhost devices will be able to work
seamlessly with IR now. Logically this should contain both vhost-net and
vhost-user case.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[move trace-events lines into target-i386/trace-events]
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Peter Xu
c15fa0bea9 ioapic: introduce ioapic_entry_parse() helper
Abstract IOAPIC entry parsing logic into a helper function.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Peter Xu
cb135f59b8 q35: ioapic: add support for emulated IOAPIC IR
This patch translates all IOAPIC interrupts into MSI ones. One pseudo
ioapic address space is added to transfer the MSI message. By default,
it will be system memory address space. When IR is enabled, it will be
IOMMU address space.

Currently, only emulated IOAPIC is supported.

Idea suggested by Jan Kiszka and Rita Sinha in the following patch:

https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg01933.html

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Michael S. Tsirkin
09cd058a2c intel_iommu: get rid of {0} initializers
Correct and portable in theory, but triggers warnings with older gcc
versions when -Wmissing-braces is enabled.
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:43 +03:00
Peter Maydell
7239247a2b Merge remote-tracking branch 'remotes/berrange/tags/pull-qcrypto-2016-07-21-1' into staging
Merge qcrypto-next 2016/07/21 v1

# gpg: Signature made Thu 21 Jul 2016 11:07:36 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qcrypto-2016-07-21-1:
  crypto: don't open-code qcrypto_hash_supports
  crypto: use glib as fallback for hash algorithm
  crypto: use /dev/[u]random as a final fallback random source

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-21 11:48:49 +01:00
Peter Maydell
61ead113ae Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Pull request

v2:
 * Resolved merge conflict with block/iscsi.c [Peter]

# gpg: Signature made Wed 20 Jul 2016 17:20:52 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request: (25 commits)
  raw_bsd: Convert to byte-based interface
  nbd: Convert to byte-based interface
  block: Kill .bdrv_co_discard()
  sheepdog: Switch .bdrv_co_discard() to byte-based
  raw_bsd: Switch .bdrv_co_discard() to byte-based
  qcow2: Switch .bdrv_co_discard() to byte-based
  nbd: Switch .bdrv_co_discard() to byte-based
  iscsi: Switch .bdrv_co_discard() to byte-based
  gluster: Switch .bdrv_co_discard() to byte-based
  blkreplay: Switch .bdrv_co_discard() to byte-based
  block: Add .bdrv_co_pdiscard() driver callback
  block: Convert .bdrv_aio_discard() to byte-based
  rbd: Switch rbd_start_aio() to byte-based
  raw-posix: Switch paio_submit() to byte-based
  block: Convert BB interface to byte-based discards
  block: Convert bdrv_aio_discard() to byte-based
  block: Switch BlockRequest to byte-based
  block: Convert bdrv_discard() to byte-based
  block: Convert bdrv_co_discard() to byte-based
  iscsi: Rely on block layer to break up large requests
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

Conflicts:
	block/gluster.c
2016-07-21 11:00:36 +01:00
Pranith Kumar
b95aae121b Revert e5dfc5e8e("Move README to markdown")
checkpatch.pl and other scripts fail without README. Revert
the rename for now; we may add README.md as a symlink later.

This reverts commit e5dfc5e8e7.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Stefan Weil <sw@weilnetz.de>
Message-id: 20160720203131.30229-2-bobby.prani@gmail.com
[PMM: tweaked commit message a little]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-21 10:46:41 +01:00
Daniel P. Berrange
7603289712 crypto: don't open-code qcrypto_hash_supports
Call the existing qcrypto_hash_supports method from
qcrypto_hash_bytesv instead of open-coding it again.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-21 10:46:27 +01:00
Daniel P. Berrange
2165477c0f crypto: use glib as fallback for hash algorithm
GLib >= 2.16 provides GChecksum API which is good enough
for md5, sha1, sha256 and sha512. Use this as a final
fallback if neither nettle or gcrypt are available. This
lets us remove the stub hash impl, and so callers can
be sure those 4 algs are always available at compile
time. They may still be disabled at runtime, so a check
for qcrypto_hash_supports() is still best practice to
report good error messages.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-21 10:46:27 +01:00
Daniel P. Berrange
f3c8355c7a crypto: use /dev/[u]random as a final fallback random source
If neither gcrypt or gnutls are available to provide a
cryptographic random number generator, fallback to consuming
bytes directly from /dev/[u]random.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-21 10:46:27 +01:00
Peter Maydell
e66b05e9ca Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
x86 queue, 2016-07-20

# gpg: Signature made Wed 20 Jul 2016 16:07:38 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request: (28 commits)
  pc: Make device_del CPU work for x86 CPUs
  target-i386: Add x86_cpu_unrealizefn()
  apic: Use apic_id as apic's migration instance_id
  (kvm)apic: Add unrealize callbacks
  apic: kvm-apic: Fix crash due to access to freed memory region
  apic: Drop APICCommonState.idx and use APIC ID as index in local_apics[]
  apic: move MAX_APICS check to 'apic' class
  pc: Implement query-hotpluggable-cpus callback
  pc: cpu: Allow device_add to be used with x86 cpu
  pc: Enforce adding CPUs contiguously and removing them in opposite order
  pc: Forbid BSP removal
  pc: Register created initial and hotpluged CPUs in one place pc_cpu_plug()
  pc: Delay setting number of boot CPUs to machine_done time
  pc: Set APIC ID based on socket/core/thread ids if it's not been set yet
  target-i386: Fix apic object leak when CPU is deleted
  target-i386: cpu: Do not ignore error and fix apic parent
  target-i386: Add support for UMIP and RDPID CPUID bits
  target-i386: Add socket/core/thread properties to X86CPU
  target-i386: Replace custom apic-id setter/getter with static property
  pc: cpu: Consolidate apic-id validity checks in pc_cpu_pre_plug()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 21:32:56 +01:00
Peter Maydell
3b55fbdcb0 Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160720' into staging
Fixes for s390x in the css area.

# gpg: Signature made Wed 20 Jul 2016 15:12:43 BST
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20160720:
  s390x/css: provide a dev_path for css devices
  s390x/css: sch_handle_start_func() handles resume, too
  s390x/css: copy CCW format bit from ORB to SCSW

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 20:59:05 +01:00
Peter Maydell
6a426eb27e Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20160720-1' into staging
usb: xhci assert fix, add usbredir streams property

# gpg: Signature made Wed 20 Jul 2016 12:32:09 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20160720-1:
  usbredir: add streams property
  xhci: Fix possible side effect from assert()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 20:31:31 +01:00
Peter Maydell
518cb31fa7 Merge remote-tracking branch 'remotes/kraxel/tags/pull-vga-20160720-1' into staging
qxl: fix qxl_set_dirty call in qxl_dirty_one_surface

# gpg: Signature made Wed 20 Jul 2016 12:28:01 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-vga-20160720-1:
  qxl: fix qxl_set_dirty call in qxl_dirty_one_surface

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 19:41:20 +01:00
Peter Maydell
46ca418d9f Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
# gpg: Signature made Wed 20 Jul 2016 12:19:56 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  docker: pass EXECUTABLE to build script
  docker: Don't start a container that doesn't exist
  docker: Add "images" subcommand to docker.py
  docker: Fix exit code if $CMD failed
  docker: More sensible run script
  tests/docker/docker.py: add update operation
  tests/docker/dockerfiles: new debian-bootstrap.docker
  tests/docker/docker.py: check and run .pre script
  tests/docker/docker.py: support --include-executable
  tests/docker/docker.py: docker_dir outside build

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 18:52:10 +01:00
Peter Xu
651e4cefee intel_iommu: Add support for PCI MSI remap
This patch enables interrupt remapping for PCI devices.

To play the trick, one memory region "iommu_ir" is added as child region
of the original iommu memory region, covering range 0xfeeXXXXX (which is
the address range for APIC). All the writes to this range will be taken
as MSI, and translation is carried out only when IR is enabled.

Idea suggested by Paolo Bonzini.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:31:04 +03:00
Peter Xu
a4ca297e84 intel_iommu: add IR translation faults defines
Adding translation fault definitions for interrupt remapping. Please
refer to VT-d spec section 7.1.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
1f91acee17 intel_iommu: define several structs for IOMMU IR
Several data structs are defined to better support the rest of the
patches: IRTE to parse remapping table entries, and IOAPIC/MSI related
structure bits to parse interrupt entries to be filled in by guest
kernel.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
80de52ba87 intel_iommu: handle interrupt remap enable
Handle writting to IRE bit in global command register.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
a58614391d intel_iommu: define interrupt remap table addr register
Defined Interrupt Remap Table Address register to store IR table
pointer. Also, do proper handling on global command register writes to
store table pointer and its size.

One more debug flag "DEBUG_IR" is added for interrupt remapping.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
cfc13df462 acpi: add DMAR scope definition for root IOAPIC
To enable interrupt remapping for intel IOMMU device, each IOAPIC device
in the system reported via ACPI MADT must be explicitly enumerated under
one specific remapping hardware unit. This patch adds the root-complex
IOAPIC into the default DMAR device.

Please refer to VT-d spec 8.3.1.1 for more information.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
d54bd7f80a intel_iommu: set IR bit for ECAP register
Enable IR in IOMMU Extended Capability register.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
b79104722f intel_iommu: allow queued invalidation for IR
Queued invalidation is required for IR. This patch add basic support for
interrupt cache invalidate requests. Since we currently have no IR cache
implemented yet, we can just skip all interrupt cache invalidation
requests for now.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
d46114f9ec acpi: enable INTR for DMAR report structure
In ACPI DMA remapping report structure, enable INTR flag when specified.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
1121e0afdc x86-iommu: introduce "intremap" property
Adding one property for intel-iommu devices to specify whether we should
support interrupt remapping. By default, IR is disabled. To enable it,
we should use (take Intel IOMMU as example):

  -device intel_iommu,intremap=on

This property can be shared by Intel and future AMD IOMMUs.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
1cf5fd573f x86-iommu: provide x86_iommu_get_default
Instead of searching the device tree every time, one static variable is
declared for the default system x86 IOMMU device.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
04af0e18bc intel_iommu: rename VTD_PCI_DEVFN_MAX to x86-iommu
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
1c7955c450 x86-iommu: introduce parent class
Introducing parent class for intel-iommu devices named "x86-iommu". This
is preparation work to abstract shared functionalities out from Intel
and AMD IOMMUs. Currently, only the parent class is introduced. It does
nothing yet.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Marcel Apfelbaum
b1af7959a6 hw/versatile: realize the PCI root bus as part of the versatile init
'Realize' the PCI root bus manually since the 'realize' mechanism
does not propagate to child devices yet.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:27 +03:00
Marcel Apfelbaum
685f9a3428 hw/prep: realize the PCI root bus as part of the prep init
'Realize' the PCI root bus manually since the 'realize' mechanism
does not propagate to child devices yet.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:26 +03:00
Marcel Apfelbaum
3c3c1e3203 hw/grackle: fix PCI bus initialization
Delay the host-bridge 'realization' until the
PCI root bus is attached.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:26 +03:00
Marcel Apfelbaum
2f3ae0b2d4 hw/apb: fix PCI bus initialization
Create and connect the PCI root bus to the
host bridge before the later is 'realized'.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:26 +03:00
Marcel Apfelbaum
a8c1a75343 hw/mips: fix PCI bus initialization
Delay the host-bridge 'realization' until the
PCI root bus is attached.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Acked-by: Leon Alrae <leon.alrae@imgtec.com>
Tested-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-20 19:30:26 +03:00
Marcel Apfelbaum
50d3bba9da hw/alpha: fix PCI bus initialization
Delay the host-bridge 'realization' until the
PCI root bus is attached.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:26 +03:00
Marcel Apfelbaum
eaf8d91cd7 tests/prom-env-test: increase the test timeout
On a slower machine the test can take more than 30 seconds.
Increase the timeout to 100 seconds.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:26 +03:00
Stefan Hajnoczi
cdcab9d941 nvdimm: fix memory leak in error code path
object_get_canonical_path_component() returns a heap-allocated string
that must be freed using g_free().

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:26 +03:00
Peter Maydell
e0ce97f896 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Wed 20 Jul 2016 01:18:39 BST
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  block/gluster: add support for multiple gluster servers
  block/gluster: using new qapi schema
  block/gluster: deprecate rdma support
  block/gluster: code cleanup
  block/gluster: rename [server, volname, image] -> [host, volume, path]
  mirror: fix request throttling in drive-mirror
  mirror: improve performance of mirroring of empty disk
  mirror: efficiently zero out target
  mirror: optimize dirty bitmap filling in mirror_run a bit
  block: remove extra condition in bdrv_can_write_zeroes_with_unmap
  mirror: create mirror_dirty_init helper for mirror_run
  mirror: create mirror_throttle helper
  mirror: make sectors_in_flight int64_t
  dirty-bitmap: operate with int64_t amount

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 17:05:35 +01:00
Igor Mammedov
8fe6374e8e pc: Make device_del CPU work for x86 CPUs
ACPI subsystem already has all logic in place the only
thing left to eject CPU is destroy it and ammend
present CPUs counter in CMOS, do so.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:20 -03:00
Igor Mammedov
c884776e9d target-i386: Add x86_cpu_unrealizefn()
First remove VCPU from exec loop and only then remove lapic.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:20 -03:00
Igor Mammedov
f6e984443f apic: Use apic_id as apic's migration instance_id
instance_id is generated by last_used_id + 1 for a given device type
so for QEMU with 3 CPUs instance_id for APICs is a seti of [0, 1, 2]
When CPU in the middle is hot-removed and migration started
APICs with instance_ids 0 and 2 are transferred in migration stream.
However target starts with 2 CPUs and APICs' instance_ids are
generated from scratch [0, 1] hence migration fails with error
  Unknown savevm section or instance 'apic' 2

Fix issue by manually registering APIC's vmsd with apic_id as
instance_id, in this case instance_id on target will always
match instance_id on source as apic_id is the same for a given
cpu instance.

Reported-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
9c156f9de5 (kvm)apic: Add unrealize callbacks
Callbacks will do necessary cleanups before APIC device is deleted

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
365aa1131f apic: kvm-apic: Fix crash due to access to freed memory region
kvm-apic.io_memory memory region had its parent set to NULL at
memory_region_init_io() time, so it ended up as a child in
 /unattached contaner.
As result when kvm-apic instance was deleted, the child property
 /unattached/kvm-apic-msi[XXX] contained a reference to
kvm-apic.io_memory address which was freed as part of kvm-apic.

Do the same as 'apic' and make kvm-apic instance the owner
of the memory region so that it won't end up in /unattached
and gets cleanly released along with related kvm-apic instance.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
1dfe3282cf apic: Drop APICCommonState.idx and use APIC ID as index in local_apics[]
local_apics[] is sized to contain all APIC ID supported in xAPIC mode,
so use APIC ID as index in it instead of constantly increasing counter idx.

Fixes error "apic initialization failed" when a CPU hotplugged and
unplugged more times than there are free slots in local_apics[].

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
889211b18b apic: move MAX_APICS check to 'apic' class
MAX_APICS is only used by child 'apic' class and not
by its parent TYPE_APIC_COMMON or any other derived
class.

Move check into end user 'apic' class so it won't
get in the way of other APIC implementations
if they support more then MAX_APICS.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
4d952914a0 pc: Implement query-hotpluggable-cpus callback
it returns a list of present/possible to hotplug CPU
objects with a list of properties to use with
device_add.

in PC case returned list would looks like:
-> { "execute": "query-hotpluggable-cpus" }
<- {"return": [
     {
        "type": "qemu64-x86_64-cpu", "vcpus-count": 1,
        "props": {"core-id": 0, "socket-id": 1, "thread-id": 0}
     },
     {
        "qom-path": "/machine/unattached/device[0]",
        "type": "qemu64-x86_64-cpu", "vcpus-count": 1,
        "props": {"core-id": 0, "socket-id": 0, "thread-id": 0}
     }
   ]}

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
edd1211194 pc: cpu: Allow device_add to be used with x86 cpu
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
4da7faaeb0 pc: Enforce adding CPUs contiguously and removing them in opposite order
It will still allow us to use cpu_index as migration instance_id
since when CPUs are added contiguously (from the first to the last)
and removed in opposite order, cpu_index stays stable and it's
reproducible on destination side.

While there is work in progress to support migration when there
are holes in cpu_index range resulting from out-of-order plug or
unplug, this patch is intended as an interim solution until
cpu_index usage is cleaned up.

As result of this patch it would be possible to plug/unplug CPUs,
but in limited order that doesn't break migration.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
73360e2785 pc: Forbid BSP removal
Boot CPU is assumed to always present in QEMU code, so
untile that assumptions are gone, deny removal request,
In another words QEMU won't support BSP hot-unplug.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
a44a49dbf2 pc: Register created initial and hotpluged CPUs in one place pc_cpu_plug()
Consolidate possible_cpus array management in pc_cpu_plug() for
smp_cpus, coldplugged with -device and hotplugged with
device_add.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
ba157b696c pc: Delay setting number of boot CPUs to machine_done time
Currently present CPUs counter in CMOS only contains
smp_cpus (i.e. initial CPUs specified with -smp X) and
doesn't account for CPUs created with -device.
If VM is started with additional CPUs added with
 -device, it will hang in BIOS waiting for condition
   smp_cpus == counted_cpus
forever as counted_cpus will include -device CPUs as well
and be more than smp_cpus.

Make present CPUs counter in CMOS to count all CPUs
(initial and coldplugged with -device) by delaying
it to machine done time when it possible to count
CPUs added with -device.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
e8f7b83e88 pc: Set APIC ID based on socket/core/thread ids if it's not been set yet
CPU added with device_add help won't have APIC ID set,
so set it according to socket/core/thread ids provided
with device_add command.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
67e55caa6d target-i386: Fix apic object leak when CPU is deleted
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
6816b1b381 target-i386: cpu: Do not ignore error and fix apic parent
object_property_add_child() silently fails with error that it can't
create duplicate propery 'apic' as we already have 'apic' property
registered for 'apic' feature. As result generic device_realize puts
apic into unattached container.

As it's programming error, abort if name collision happens in future
and fix property name for apic_state to 'lapic', this way apic is
a child of cpu instance.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Paolo Bonzini
c2f193b538 target-i386: Add support for UMIP and RDPID CPUID bits
These are both stored in CPUID[EAX=7,EBX=0].ECX.  KVM is going to
be able to emulate both (albeit with a performance loss in the case
of RDPID, which therefore will be in KVM_GET_EMULATED_CPUID rather
than KVM_GET_SUPPORTED_CPUID).

It's also possible to implement both in TCG, but this is for 2.8.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
d89c2b8b98 target-i386: Add socket/core/thread properties to X86CPU
These properties will be used by as address where to plug
CPU with help -device/device_add commands.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
2da00e3176 target-i386: Replace custom apic-id setter/getter with static property
Custom apic-id setter/getter doesn't do any property specific
checks anymore, so clean it up and use more compact static
property DEFINE_PROP_UINT32 instead.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
4ec60c76d5 pc: cpu: Consolidate apic-id validity checks in pc_cpu_pre_plug()
Machine code knows about all possible APIC IDs so use that
instead of hack which does O(n^2) complexity duplicate
checks, interating over global CPUs list.
As result duplicate check is done only once with O(log n) complexity.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:17 -03:00
Igor Mammedov
7baef5cfea pc: Extract CPU lookup into a separate function
It will be reused in the next patch at pre_plug time

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:44 -03:00
Dr. David Alan Gilbert
11f6fee576 target-i386: Set physical address bits based on host
Add the host-phys-bits boolean property, if true, take phys-bits
from the hosts physical bits value, overriding either the default
or the user specified value.

We can also use the value we read from the host to check the users
explicitly set value and warn them if it doesn't match.

Note:
   a) We only read the hosts value in KVM mode (because on non-x86
      we get an abort if we try)
   b) We don't warn about trying to use host-phys-bits in TCG mode,
      we just fall back to the TCG default.  This allows the machine
      type to set the host-phys-bits flag if it wants and then to
      work in both TCG and KVM.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:44 -03:00
Igor Mammedov
9f3aab5853 pc: Add x86_topo_ids_from_apicid()
It's reverse of apicid_from_topo_ids() and will be used in follow up
patches to fill in data structures for query-hotpluggable-cpus and
for user friendly error reporting.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:44 -03:00
Igor Mammedov
d9c84f1969 target-i386: Use uint32_t for X86CPU.apic_id
Redo 9886e834 (target-i386: Require APIC ID to be explicitly set before
CPU realize) in another way that doesn't use int64_t to detect
if apic-id property has been set.

Use the fact that 0xFFFFFFFF is the broadcast
value that a CPU can't have and set default
uint32_t apic_id to it instead of using int64_t.

Later uint32_t apic_id will be used to drop custom
property setter/getter in favor of static property.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:44 -03:00
Dr. David Alan Gilbert
fcc35e7cca target-i386: Fill high bits of mtrr mask
Fill the bits between 51..number-of-physical-address-bits in the
MTRR_PHYSMASKn variable range mtrr masks so that they're consistent
in the migration stream irrespective of the physical address space
of the source VM in a migration.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:44 -03:00
Dr. David Alan Gilbert
112dad69d7 target-i386: Mask mtrr mask based on CPU physical address limits
The CPU GPs if we try and set a bit in a variable MTRR mask above
the limit of physical address bits on the host.  We hit this
when loading a migration from a host with a larger physical
address limit than our destination (e.g. a Xeon->i7 of same
generation) but previously used to get away with it
until 48e1a45 started checking that msr writes actually worked.

It seems in our case the GP probably comes from KVM emulating
that GP.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:43 -03:00
Dr. David Alan Gilbert
af45907a13 target-i386: Allow physical address bits to be set
Currently QEMU sets the x86 number of physical address bits to the
magic number 40.  This is only correct on some small AMD systems;
Intel systems tend to have 36, 39, 46 bits, and large AMD systems
tend to have 48.

Having the value different from your actual hardware is detectable
by the guest and in principal can cause problems;
The current limit of 40 stops TB VMs being created by those lucky
enough to have that much.

This patch lets you set the physical bits by a cpu property but
defaults to the same 40bits which matches TCGs setup.

I've removed the ancient warning about the 42 bit limit in exec.c;
I can't find that limit in there and no one else seems to know where
it is.

We use a magic value of 0 as the property default so that we can
later distinguish between the default and a user set value.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:43 -03:00
Dr. David Alan Gilbert
709787ee99 target-i386: Provide TCG_PHYS_ADDR_BITS
Provide a constant for the number of address bits supported under TCG.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Suggested-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:43 -03:00
Cornelia Huck
2a79eb1a61 s390x/css: provide a dev_path for css devices
We need to implement the get_dev_path method for the css bus, or
else we might end up with two different devices having the same
qdev_path.

This was noticed when adding two scsi_hd controllers: The SCSIBus
code will produce a non-unique dev_path for vmstate usage if the
parent bus does not provide the get_dev_path method.

We simply use the device's bus id, as this is unique and we won't
have any deeper hierarchy from a channel subsystem perspective
anyway.

Note that we need to disable this for older machine versions,
as this changes the migration format.

Reported-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Tested-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-20 15:47:25 +02:00
Sascha Silbe
727a0424dd s390x/css: sch_handle_start_func() handles resume, too
It's not obvious from the code flow that sch_handle_start_func() gets
called for rsch. Add some comments explaining this.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-20 15:47:25 +02:00
Sascha Silbe
485dd69088 s390x/css: copy CCW format bit from ORB to SCSW
The CCW Format (F) flag of the Subchannel-Status Word (SCSW) indicates
the format of the CCWs "associated with an I/O operation", i.e. the
value of CCW-Format Control (F) bit of the Operation-Request Block
(ORB).

Copy the CCW format bit from the ORB to the SCSW so we correctly
indicate the format of the CCWs to the guest.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-20 15:47:25 +02:00
Peter Maydell
3b2e6798ff Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2016-07-19' into staging
QAPI patches for 2016-07-19

# gpg: Signature made Tue 19 Jul 2016 19:35:27 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2016-07-19:
  net: Use correct type for bool flag
  qapi: Change Netdev into a flat union
  block: Simplify drive-mirror
  block: Simplify block_set_io_throttle
  qapi: Implement boxed types for commands/events
  qapi: Plumb in 'boxed' to qapi generator lower levels
  qapi-event: Simplify visit of non-implicit data
  qapi: Drop useless gen_err_check()
  qapi: Add type.is_empty() helper
  qapi: Hide tag_name data member of variants
  qapi: Special case c_name() for empty type
  qapi: Require all branches of flat union enum to be covered
  net: use Netdev instead of NetClientOptions in client init
  qapi: change QmpInputVisitor to QSLIST
  qapi: change QmpOutputVisitor to QSLIST

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 14:34:08 +01:00
Eric Blake
decaeed773 raw_bsd: Convert to byte-based interface
Since the raw format driver is just passing things through, we can
do byte-based read and write if the underlying protocol does
likewise.

There's one tricky part - if we probed the image format, we document
that we restrict operations on the initial sector.  It's easiest to
keep this guarantee by enforcing read-modify-write on sub-sector
operations (yes, this partially reverts commit ad82be2f).

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 1468624988-423-20-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
70c4fb2648 nbd: Convert to byte-based interface
The NBD protocol doesn't have any notion of sectors, so it is
a fairly easy conversion to use byte-based read and write.

Signed-off-by: Eric Blake <eblake@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468624988-423-19-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
02aefe43cb block: Kill .bdrv_co_discard()
Now that all drivers have a byte-based .bdrv_co_pdiscard(), we
no longer need to worry about the sector-based version.  We can
also relax our minimum alignment to 1 for drivers that support it.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-18-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
dde4753763 sheepdog: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-17-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
5f61ad079a raw_bsd: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-16-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
82e8a7888b qcow2: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-15-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
447e57c3b0 nbd: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

While at it, call directly into nbd-client.c instead of having
a pointless trivial wrapper in nbd.c.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-14-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
97c7e85cfe iscsi: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Unlike write_zeroes, where we can be handed unaligned requests
and must fail gracefully with -ENOTSUP for a fallback, we are
guaranteed that discard requests are always aligned because the
block layer already ignored unaligned head/tail.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-13-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
1014170b82 gluster: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-12-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
aba76e2f03 blkreplay: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-11-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
47a5486d59 block: Add .bdrv_co_pdiscard() driver callback
There's enough drivers with a sector-based callback that it will
be easier to switch one at a time.  This patch adds a byte-based
callback, and then after all drivers are swapped, we'll drop the
sector-based callback.

[checkpatch doesn't like the space after coroutine_fn in
block_int.h, but it's consistent with the rest of the file]

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-10-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
4da444a0bb block: Convert .bdrv_aio_discard() to byte-based
Another step towards byte-based interfaces everywhere.  Replace
the sector-based driver callback .bdrv_aio_discard() with a new
byte-based .bdrv_aio_pdiscard().  Only raw-posix and RBD drivers
are affected, so it was not worth splitting into multiple patches.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-9-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
7bbca9e290 rbd: Switch rbd_start_aio() to byte-based
The internal function converts to byte-based before calling into
RBD code; hoist the conversion to the callers so that callers
can then be switched to byte-based themselves.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-8-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
36e3b2e733 raw-posix: Switch paio_submit() to byte-based
The only remaining uses of paio_submit() were flush (with no
offset or count) and discard (which we are switching to byte-based);
furthermore, the similarly named paio_submit_co() is already
byte-based.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-7-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
1c6c4bb7f0 block: Convert BB interface to byte-based discards
Change sector-based blk_discard(), blk_co_discard(), and
blk_aio_discard() to instead be byte-based blk_pdiscard(),
blk_co_pdiscard(), and blk_aio_pdiscard().  NBD gets a lot
simpler now that ignoring the unaligned portion of a
byte-based discard request is handled under the hood by
the block layer.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-6-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
60ebac16bc block: Convert bdrv_aio_discard() to byte-based
Another step towards byte-based interfaces everywhere.  Replace
the sector-based bdrv_aio_discard() with a new byte-based
bdrv_aio_pdiscard(), which silently ignores any unaligned head
or tail.  Driver callbacks will be converted in followup patches.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 1468624988-423-5-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
b15404e027 block: Switch BlockRequest to byte-based
BlockRequest is the internal struct used by bdrv_aio_*.  At the
moment, all such calls were sector-based, but we will eventually
convert to byte-based; start by changing the internal variables
to be byte-based.  No change to behavior, although the read and
write code can now go byte-based through more of the stack.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 1468624988-423-4-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
0c51a893b6 block: Convert bdrv_discard() to byte-based
Another step towards byte-based interfaces everywhere.  Replace
the sector-based bdrv_discard() with a new byte-based
bdrv_pdiscard(), which silently ignores any unaligned head
or tail.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-3-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
9f1963b3f7 block: Convert bdrv_co_discard() to byte-based
Another step towards byte-based interfaces everywhere.  Replace
the sector-based bdrv_co_discard() with a new byte-based
bdrv_co_pdiscard(), which silently ignores any unaligned head
or tail.  Driver callbacks will be converted in followup patches.

By calculating the alignment outside of the loop, and clamping
the max discard to an aligned value, we can simplify the actions
done within the loop.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-2-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
6bd01f14db iscsi: Rely on block layer to break up large requests
Now that the block layer honors max_request, we don't need to
bother with an EINVAL on overlarge requests, but can instead
assert that requests are well-behaved.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468607524-19021-7-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
1e2a77a851 nbd: Drop unused offset parameter
Now that NBD relies on the block layer to fragment things, we no
longer need to track an offset argument for which fragment of
a request we are actually servicing.

While at it, use true and false instead of 0 and 1 for a bool
parameter.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468607524-19021-6-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
fb1a6de14a nbd: Rely on block layer to break up large requests
Now that the block layer will honor max_transfer, we can simplify
our code to rely on that guarantee.

The readv code can call directly into nbd-client, just as the
writev code has done since commit 52a4650.

Interestingly enough, while qemu-io 'w 0 40m' splits into a 32M
and 8M transaction, 'w -z 0 40m' splits into two 16M and an 8M,
because the block layer caps the bounce buffer for writing zeroes
at 16M.  When we later introduce support for NBD_CMD_WRITE_ZEROES,
we can get a full 32M zero write (or larger, if the client and
server negotiate that write zeroes can use a larger size than
ordinary writes).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468607524-19021-5-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
04ed95f484 block: Fragment writes to max transfer length
Drivers should be able to rely on the block layer honoring the
max transfer length, rather than needing to return -EINVAL
(iscsi) or manually fragment things (nbd).  We already fragment
write zeroes at the block layer; this patch adds the fragmentation
for normal writes, after requests have been aligned (fragmenting
before alignment would lead to multiple unaligned requests, rather
than just the head and tail).

When fragmenting a large request where FUA was requested, but
where we know that FUA is implemented by flushing all requests
rather than the given request, then we can still get by with
only one flush.  Note, however, that we need a followup patch
to the raw format driver to avoid a regression in the number of
flushes actually issued.

The return value was previously nebulous on success (sometimes
zero, sometimes the length written); since we never have a short
write, and since fragmenting may store yet another positive
value in 'ret', change the function to always return 0 on success,
matching what we do in bdrv_aligned_preadv().

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 1468607524-19021-4-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
8a39b4d6e2 raw_bsd: Don't advertise flags not supported by protocol layer
The raw format layer supports all flags via passthrough - but
it only makes sense to pass through flags that the lower layer
actually supports.

The next patch gives stronger reasoning for why this is correct.
At the moment, the raw format layer ignores the max_transfer
limit of its protocol layer, and an attempt to do the qemu-io
'w -f 0 40m' to an NBD server that lacks FUA will pass the entire
40m request to the NBD driver, which then fragments the request
itself into a 32m write, 8m write, and flush.  But once the block
layer starts honoring limits and fragmenting packets, the raw
driver will hand the NBD driver two separate requests; if both
requests have BDRV_REQ_FUA set, then this would result in a 32m
write, flush, 8m write, and second flush.  By having the raw
layer no longer advertise FUA support when the protocol layer
lacks it, we are back to a single flush at the block layer for
the overall 40m request.

Note that 'w -f -z 0 40m' does not currently exhibit the same
problem, because there, the fragmentation does not occur until
at the NBD layer (the raw layer has .bdrv_co_pwrite_zeroes, and
the NBD layer doesn't advertise max_pwrite_zeroes to constrain
things at the raw layer) - but the problem is latent and we
would again have too many flushes without this patch once the
NBD layer implements support for the new NBD_CMD_WRITE_ZEROES
command, if it sets max_pwrite_zeroes to the same 32m limit as
recommended by the NBD protocol.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468607524-19021-3-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
1a62d0accd block: Fragment reads to max transfer length
Drivers should be able to rely on the block layer honoring the
max transfer length, rather than needing to return -EINVAL
(iscsi) or manually fragment things (nbd).  This patch adds
the fragmentation in the block layer, after requests have been
aligned (fragmenting before alignment would lead to multiple
unaligned requests, rather than just the head and tail).

The return value was previously nebulous on success on whether
it was zero or the length read; and fragmenting may introduce
yet other non-zero values if we use the last length read.  But
as at least some callers are sloppy and expect only zero on
success, it is easiest to just guarantee 0.

[Fix uninitialized ret local variable in bdrv_aligned_preadv().
--Stefan]

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 1468607524-19021-2-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Peter Maydell
338404d061 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160719' into staging
target-arm queue:
 * fix two minor Coverity complaints

# gpg: Signature made Tue 19 Jul 2016 18:02:34 BST
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20160719:
  arm_gicv3: Add assert()s to tell Coverity that offsets are aligned
  target-arm: Fix unreachable code in gicv3_class_name()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 12:48:18 +01:00
Gerd Hoffmann
87ae924b73 usbredir: add streams property
Enabled by default, can be used to turn off (usb3) streams support.
xhci has a such a property too (same name, same default).

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1468408474-17648-1-git-send-email-kraxel@redhat.com
2016-07-20 13:31:20 +02:00
Alexey Kardashevskiy
f81bb347ef xhci: Fix possible side effect from assert()
A static analysis tool called BEAM detected possible side effect from
assert() calling a helper which may change an XHCI ring after every call.

This moves xhci_ring_fetch() out of assert() so it will be called
with and without enabled debug.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Message-id: 1468812548-31868-1-git-send-email-aik@ozlabs.ru
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-20 13:31:09 +02:00
Alex Bennée
b7c851b2b8 docker: pass EXECUTABLE to build script
To build a docker image with which needs qemu linux-user emulation we
need to pass --include-executable to the build script. Using the same
mechanism as for other container controls we enable the option is
EXECUTABLE is set on the make command line e.g:

    make docker-image-debian-bootstrap V=1 J=9 DEB_ARCH=armhf \
        DEB_TYPE=stable EXECUTABLE=./arm-linux-user/qemu-arm

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-11-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Fam Zheng
ff31e2256d docker: Don't start a container that doesn't exist
Image building targets are dependencies of test running targets, so when
a docker image doesn't exist, it means it's skipped (due to dependency
checks in pre script). Therefore, skip the test too.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1468934445-32183-10-git-send-email-famz@redhat.com
2016-07-20 19:19:43 +08:00
Fam Zheng
4b08af6019 docker: Add "images" subcommand to docker.py
This is a wrapper for the 'docker images' command.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-9-git-send-email-famz@redhat.com
2016-07-20 19:19:43 +08:00
Fam Zheng
1ad76b8af8 docker: Fix exit code if $CMD failed
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1468934445-32183-8-git-send-email-famz@redhat.com
2016-07-20 19:19:43 +08:00
Fam Zheng
c81585130e docker: More sensible run script
It is very easy to figure out current directory and bash option from the
execution, so do less in the Makefile invocation command line, and
figure both options in the script.

This makes the next patch easier.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1468934445-32183-7-git-send-email-famz@redhat.com
2016-07-20 19:19:43 +08:00
Alex Bennée
6e733da676 tests/docker/docker.py: add update operation
This adds a new operation to the docker script to allow updating of
binaries in an existing container. This is because it would be
inefficient to re-build the whole container just for an update to the
QEMU binary.

To update the executable run:

    ./tests/docker/docker.py update \
        debian:armhf ./arm-linux-user/qemu-arm

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-6-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Alex Bennée
95c975013a tests/docker/dockerfiles: new debian-bootstrap.docker
Together with the debian-bootstrap.pre script can now build an arbitrary
architecture of Debian using debootstrap. This allows debootstrap to set
up its first stage before the container is built.

To build a container you need a command line like:

  DEB_ARCH=armhf DEB_TYPE=testing \
    ./tests/docker/docker.py build \
    --include-executable=arm-linux-user/qemu-arm debian:armhf \
    ./tests/docker/dockerfiles/debian-bootstrap.docker

Although a number of non-debian systems package the debootstrap script
it is fairly portable in itself. Assuming we have some sort of fakeroot
implementation we can just clone the upstream repository and use the
script from there.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-5-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Alex Bennée
920776ea5e tests/docker/docker.py: check and run .pre script
The docker script will now search for an associated $dockerfile.pre
script which gets run in the same build context as the dockerfile will
be. This is to support pre-seeding the build context before running the
docker build.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-4-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Alex Bennée
504ca3c208 tests/docker/docker.py: support --include-executable
When passed the path to a binary we copy it and any linked libraries (if
it is dynamically linked) into the docker build context. These can then
be included by a dockerfile with the line:

  # Copy all of context into container
  ADD . /

This is mainly intended for setting up foreign architecture docker
images which use qemu-$arch to do cross-architecture linux-user
execution. It also relies on the host and guest file-system following
reasonable multi-arch layouts so the copied libraries don't clash with
the guest ones.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-3-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Alex Bennée
a9f8d03891 tests/docker/docker.py: docker_dir outside build
Instead of letting the build_image create the temporary working dir we
move the creation to the build command. This is preparation for the
later patches where additional files can be added to the build context
before the build step is run.

We also ensure we remove the build context after we are done (mkdtemp
doesn't do this automatically for you).

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468934445-32183-2-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Peter Maydell
1ecfb24da9 Merge remote-tracking branch 'remotes/riku/tags/pull-linux-user-20160719-2' into staging
linux-user fixes before 2.7 freeze, fix commit message

# gpg: Signature made Tue 19 Jul 2016 14:18:54 BST
# gpg:                using RSA key 0xB44890DEDE3C9BC0
# gpg: Good signature from "Riku Voipio <riku.voipio@iki.fi>"
# gpg:                 aka "Riku Voipio <riku.voipio@linaro.org>"
# Primary key fingerprint: FF82 03C8 C391 98AE 0581  41EF B448 90DE DE3C 9BC0

* remotes/riku/tags/pull-linux-user-20160719-2:
  linux-user: AArch64 has sync_file_range, not sync_file_range2
  linux-user: Fix type for SIOCATMARK ioctl
  linux-user: define missing sparc syscalls
  linux-user: Fix terminal control ioctls
  linux-user: Add some new blk ioctls
  linux-user: Handle short lengths in host_to_target_sockaddr()
  linux-user: Forget about synchronous signal once it is delivered
  linux-user: Correct type for LOOP_GET_STATUS{,64} ioctls
  linux-user: Correct type for BLKSSZGET
  linux-user: Add loop control ioctls
  linux-user: Check sigsetsize argument to syscalls
  linux-user: add nested netlink types
  linux-user: convert sockaddr_ll from host to target
  linux-user: add fd_trans helper in do_recvfrom()
  linux-user: fix netlink memory corruption
  linux-user: fd_trans_*_data() returns the length

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 11:32:12 +01:00
Prasanna Kumar Kalever
6c7189bb29 block/gluster: add support for multiple gluster servers
This patch adds a way to specify multiple volfile servers to the gluster
block backend of QEMU with tcp|rdma transport types and their port numbers.

Problem:

Currently VM Image on gluster volume is specified like this:

file=gluster[+tcp]://host[:port]/testvol/a.img

Say we have three hosts in a trusted pool with replica 3 volume in action.
When the host mentioned in the command above goes down for some reason,
the other two hosts are still available. But there's currently no way
to tell QEMU about them.

Solution:

New way of specifying VM Image on gluster volume with volfile servers:
(We still support old syntax to maintain backward compatibility)

Basic command line syntax looks like:

Pattern I:
 -drive driver=gluster,
        volume=testvol,path=/path/a.raw,[debug=N,]
        server.0.type=tcp,
        server.0.host=1.2.3.4,
        server.0.port=24007,
        server.1.type=unix,
        server.1.socket=/path/socketfile

Pattern II:
 'json:{"driver":"qcow2","file":{"driver":"gluster",
       "volume":"testvol","path":"/path/a.qcow2",["debug":N,]
       "server":[{hostinfo_1}, ...{hostinfo_N}]}}'

   driver      => 'gluster' (protocol name)
   volume      => name of gluster volume where our VM image resides
   path        => absolute path of image in gluster volume
  [debug]      => libgfapi loglevel [(0 - 9) default 4 -> Error]

  {hostinfo}   => {{type:"tcp",host:"1.2.3.4"[,port=24007]},
                   {type:"unix",socket:"/path/sockfile"}}

   type        => transport type used to connect to gluster management daemon,
                  it can be tcp|unix
   host        => host address (hostname/ipv4/ipv6 addresses/socket path)
   port        => port number on which glusterd is listening.
   socket      => path to socket file

Examples:
1.
 -drive driver=qcow2,file.driver=gluster,
        file.volume=testvol,file.path=/path/a.qcow2,file.debug=9,
        file.server.0.type=tcp,
        file.server.0.host=1.2.3.4,
        file.server.0.port=24007,
        file.server.1.type=unix,
        file.server.1.socket=/var/run/glusterd.socket
2.
  'json:{"driver":"qcow2","file":{"driver":"gluster","volume":"testvol",
         "path":"/path/a.qcow2","debug":9,"server":
         [{"type":"tcp","host":"1.2.3.4","port":"24007"},
          {"type":"unix","socket":"/var/run/glusterd.socket"}
         ]}}'

This patch gives a mechanism to provide all the server addresses, which are in
replica set, so in case host1 is down VM can still boot from any of the
active hosts.

This is equivalent to the backup-volfile-servers option supported by
mount.glusterfs (FUSE way of mounting gluster volume)

credits: sincere thanks to all the supporters

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-id: 1468947453-5433-6-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:38:50 -04:00
Prasanna Kumar Kalever
7edac2ddeb block/gluster: using new qapi schema
this patch adds 'GlusterServer' related schema in qapi/block-core.json

[Jeff: minor fix-ups of comments and formatting, per patch reviews]

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1468947453-5433-5-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:36:11 -04:00
Prasanna Kumar Kalever
0552ff2465 block/gluster: deprecate rdma support
gluster volfile server fetch happens through unix and/or tcp, it doesn't
support volfile fetch over rdma. The rdma code may actually mislead,
so to make sure things do not break, for now we fallback to tcp when requested
for rdma, with a warning.

If you are wondering how this worked all these days, its the gluster libgfapi
code which handles anything other than unix transport as socket/tcp, sad but
true.

Also gluster doesn't support ipv6 addresses, removing the ipv6 related
comments/docs section

[Jeff: Minor grammatical fixes in comments and commit message, per
review comments]

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1468947453-5433-4-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:20:44 -04:00
Prasanna Kumar Kalever
f70c50c817 block/gluster: code cleanup
unified coding styles of multiline function arguments and other error functions
moved random declarations of structures and other list variables

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1468947453-5433-3-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:08:12 -04:00
Prasanna Kumar Kalever
d5cf4079ca block/gluster: rename [server, volname, image] -> [host, volume, path]
A future patch will add support for multiple gluster servers. Existing
terminology is a bit unusual in relation to what names are used by
other networked devices, and doesn't map very well to the terminology
we expect to use for multiple servers.  Therefore, rename the following
options:
'server'  -> 'host'
'image'   -> 'path'
'volname' -> 'volume'

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1468947453-5433-2-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:08:12 -04:00
Denis V. Lunev
cf56a3c632 mirror: fix request throttling in drive-mirror
There are 2 deficiencies here:
- mirror_iteration could start several requests inside. Thus we could
  simply have more in_flight requests than MAX_IN_FLIGHT.
- keeping this in mind throttling in mirror_run which is checking
  s->in_flight == MAX_IN_FLIGHT is wrong.

The patch adds the check and throttling into mirror_iteration and fixes
the check in mirror_run() to be sure.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 1466598927-5990-1-git-send-email-den@openvz.org
CC: Jeff Cody <jcody@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
(cherry picked from commit e648dc95c28fbca12e67be26a1fc4b9a0676c3fe)
2016-07-19 17:03:44 -04:00
Denis V. Lunev
4b5004d9fc mirror: improve performance of mirroring of empty disk
We should not take into account zero blocks for delay calculations.
They are not read and thus IO throttling is not required. In the
other case VM migration with 16 Tb QCOW2 disk with 4 Gb of data takes
days.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-9-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:02:49 -04:00
Denis V. Lunev
c7c2769c0e mirror: efficiently zero out target
With a bdrv_co_write_zeroes method on a target BDS and when this method
is working as indicated by the bdrv_can_write_zeroes_with_unmap(), zeroes
will not be placed into the wire. Thus the target could be very efficiently
zeroed out. This should be done with the largest chunk possible.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy<vsementsov@virtuozzo.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-8-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
b7d5062c9c mirror: optimize dirty bitmap filling in mirror_run a bit
There is no need to scan allocation tables if we have mark_all_dirty flag
set. Just mark it all dirty.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy<vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-7-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
2f0342efdb block: remove extra condition in bdrv_can_write_zeroes_with_unmap
All .bdrv_co_write_zeroes callbacks nowadays work perfectly even
with backing store attached. If future new callbacks would be unable to do
that - they have a chance to block this in bdrv_get_info().

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-6-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
c0b363ad43 mirror: create mirror_dirty_init helper for mirror_run
The code inside the helper will be extended in the next patch. mirror_run
itself is overbloated at the moment.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy<vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-5-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
49efb1f5b0 mirror: create mirror_throttle helper
The patch also places last_pause_ns from stack in mirror_run into
MirrorBlockJob structure. This helper will be useful in next patches.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1468503209-19498-4-git-send-email-den@openvz.org
CC: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
CC: Eric Blake <eblake@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
531509ba28 mirror: make sectors_in_flight int64_t
We keep here the sum of int fields. Thus this could easily overflow,
especially when we will start sending big requests in next patches.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-3-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
6d07859926 dirty-bitmap: operate with int64_t amount
Underlying HBitmap operates even with uint64_t. Thus this change is safe.
This would be useful f.e. to mark entire bitmap dirty in one call.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-2-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Eric Blake
0e55c381f6 net: Use correct type for bool flag
is_netdev is only used as a bool, so make it one.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-14-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 20:18:27 +02:00
Eric Blake
f394b2e20d qapi: Change Netdev into a flat union
This is a mostly-mechanical conversion that creates a new flat
union 'Netdev' QAPI type that covers all the branches of the
former 'NetClientOptions' simple union, where the branches are
now listed in a new 'NetClientDriver' enum rather than generated
from the simple union.  The existence of a flat union has no
change to the command line syntax accepted for new code, and
will make it possible for a future patch to switch the QMP
command to parse a boxed union for no change to valid QMP; but
it does have some ripple effect on the C code when dealing with
the new types.

While making the conversion, note that the 'NetLegacy' type
remains unchanged: it applies only to legacy command line options,
and will not be ported to QMP, so it should remain a wrapper
around a simple union; to avoid confusion, the type named
'NetClientOptions' is now gone, and we introduce 'NetLegacyOptions'
in its place.  Then, in the C code, we convert from NetLegacy to
Netdev as soon as possible, so that the bulk of the net stack
only has to deal with one QAPI type, not two.  Note that since
the old legacy code always rejected 'hubport', we can just omit
that branch from the new 'NetLegacyOptions' simple union.

Based on an idea originally by Zoltán Kővágó <DirtY.iCE.hu@gmail.com>:
Message-Id: <01a527fbf1a5de880091f98cf011616a78adeeee.1441627176.git.DirtY.iCE.hu@gmail.com>
although the sed script in that patch no longer applies due to
other changes in the tree since then, and I also did some manual
cleanups (such as fixing whitespace to keep checkpatch happy).

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-13-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Fixup from Eric squashed in]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 20:18:02 +02:00
Peter Maydell
acd8279621 arm_gicv3: Add assert()s to tell Coverity that offsets are aligned
Coverity complains that the GICR_IPRIORITYR case in gicv3_readl()
can overflow an array, because it doesn't know that the offsets
passed to that function must be word aligned. Add some assert()s
which hopefully tell Coverity that this isn't possible.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1468261372-17508-1-git-send-email-peter.maydell@linaro.org
2016-07-19 17:56:27 +01:00
Peter Maydell
ff9d3e9cd9 target-arm: Fix unreachable code in gicv3_class_name()
Coverity complains that the exit() in gicv3_class_name()
can be unreachable, because if TARGET_AARCH64 is defined
then all code paths return before reaching it. Move the
exit() up to the error_report() that it belongs with.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Message-id: 1468260552-8400-1-git-send-email-peter.maydell@linaro.org
2016-07-19 17:56:27 +01:00
Peter Maydell
c4e1cbd437 linux-user: AArch64 has sync_file_range, not sync_file_range2
The AArch64 Linux ABI syscall 84 is sync_file_range, not
sync_file_range2 (in the kernel it uses the asm-generic
headers and does not define __ARCH_WANT_SYNC_FILE_RANGE2).
Update our TARGET_NR_* definitions accordingly.

This fixes the sync_file_range syscall which otherwise
gets its arguments in the wrong order.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 16:18:11 +03:00
Peter Maydell
a57f1f8f52 linux-user: Fix type for SIOCATMARK ioctl
The SIOCATMARK ioctl takes an argument which should be a
pointer to an integer where the kernel will write the result.
We were incorrectly declaring it as TYPE_NULL which would mean
it would always fail (with EFAULT) when it should succeed.
Correct the type.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 16:18:11 +03:00
Laurent Vivier
74642d091a linux-user: define missing sparc syscalls
NR_lookup_dcookie, NR_fadvise64, NR_fadvise64_64

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 16:18:11 +03:00
Timothy Pearson
5b7f7bb39e linux-user: Fix terminal control ioctls
TIOCGPTN and related terminal control ioctls were not converted to the guest ioctl format on x86_64 targets. Convert these ioctls to enable terminal functionality on x86_64 guests.

Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 16:16:17 +03:00
Peter Maydell
4715856a68 linux-user: Add some new blk ioctls
Add some new blk ioctls (these are 0x12,119 through
to 0x12,127). Several of these are used by mke2fs; this silences
the warnings:

mke2fs 1.42.12 (29-Aug-2014)
Unsupported ioctl: cmd=0x127b
Unsupported ioctl: cmd=0x127a
warning: Unable to get device geometry for /dev/loop5
Unsupported ioctl: cmd=0x127c
Unsupported ioctl: cmd=0x127c
Unsupported ioctl: cmd=0x1277

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:23:17 +03:00
Peter Maydell
a1e221929f linux-user: Handle short lengths in host_to_target_sockaddr()
If userspace specifies a short buffer for a target sockaddr,
the kernel will only copy in as much as it has space for
(or none at all if the length is zero) -- see the kernel
move_addr_to_user() function. Mimic this in QEMU's
host_to_target_sockaddr() routine.

In particular, this fixes a segfault running the LTP
recvfrom01 test, where the guest makes a recvfrom()
call with a bad buffer pointer and other parameters which
cause the kernel to set the addrlen to zero; because we
did not skip the attempt to swap the sa_family field we
segfaulted on the bad address.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:23:17 +03:00
Peter Maydell
31efaef1d9 linux-user: Forget about synchronous signal once it is delivered
Commit 655ed67c2a which switched synchronous signals to
benig recorded in ts->sync_signal rather than in a queue
with every other signal had a bug: we failed to clear
the flag indicating that a synchronous signal was pending
when we delivered it. This meant that we would take the signal
again and again every time the guest made a syscall.
(This is a bug introduced in my refactoring of Timothy Baldwin's
original code.)

Fix this by passing in the struct emulated_sigtable* to
handle_pending_signal(), so that we clear the pending flag
in the ts->sync_signal struct when handling a synchronous signal.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:23:16 +03:00
Peter Maydell
f2c2fb50be linux-user: Correct type for LOOP_GET_STATUS{,64} ioctls
The LOOP_GET_STATUS and LOOP_GET_STATUS64 ioctls were incorrectly
defined as IOC_W rather than IOC_R, which meant we weren't
correctly copying the information back from the kernel to the guest.
The loop_info64 structure definition was also missing a member
and using the wrong type for several 32-bit fields.

In particular, this meant that "kpartx -d image.img" didn't work
and "losetup -a" behaved strangely. Correct the ioctl type definitions.

Reported-by: Chanho Park <chanho61.park@samsung.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:23:16 +03:00
Peter Maydell
a4a2c51f90 linux-user: Correct type for BLKSSZGET
The BLKSSZGET ioctl takes an argument which is a pointer to an int.
We were incorrectly declaring it to take a pointer to a long, which
meant that we would incorrectly write to memory which we should not
if the guest is a 64-bit architecture.

In particular, kpartx uses this ioctl to write to an int on the
stack, which tends to result in it crashing immediately.

Reported-by: Chanho Park <chanho61.park@samsung.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:23:16 +03:00
Peter Maydell
884cdc48a9 linux-user: Add loop control ioctls
Add support for the /dev/loop-control ioctls:
 LOOP_CTL_ADD
 LOOP_CTL_REMOVE
 LOOP_CTL_GET_FREE

[RV: fixed to apply to new header guards]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:22:33 +03:00
Peter Maydell
c815701e81 linux-user: Check sigsetsize argument to syscalls
Many syscalls which take a sigset_t argument also take an argument
giving the size of the sigset_t.  The kernel insists that this
matches its idea of the type size and fails EINVAL if it is not.
Implement this logic in QEMU.  (This mostly just means some LTP test
cases which check error cases now pass.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
2016-07-19 15:20:59 +03:00
Laurent Vivier
c5dff280b8 linux-user: add nested netlink types
Nested types are used by the kernel to send link information and
protocol properties.

We can see following errors with "ip link show":

Unimplemented nested type 26
Unimplemented nested type 26
Unimplemented nested type 18
Unimplemented nested type 26
Unimplemented nested type 18
Unimplemented nested type 26

This patch implements nested types 18 (IFLA_LINKINFO) and
26 (IFLA_AF_SPEC).

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:20:59 +03:00
Laurent Vivier
a82ea9393d linux-user: convert sockaddr_ll from host to target
As we convert sockaddr for AF_PACKET family for sendto() (target to
host) we need also to convert this for getsockname() (host to target).

arping uses getsockname() to get the the interface address and uses
this address with sendto().

Tested with:

    /sbin/arping -D -q -c2 -I eno1 192.168.122.88

...
getsockname(3, {sa_family=AF_PACKET, proto=0x806, if2,
pkttype=PACKET_HOST, addr(6)={1, 10c37b6b9a76}, [18]) = 0
...
sendto(3, "..." 28, 0,
       {sa_family=AF_PACKET, proto=0x806, if2, pkttype=PACKET_HOST,
       addr(6)={1, ffffffffffff}, 20) = 28
...

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:20:59 +03:00
Laurent Vivier
c35e1f9c87 linux-user: add fd_trans helper in do_recvfrom()
Fix passwd using netlink audit.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:20:59 +03:00
Laurent Vivier
7d61d89232 linux-user: fix netlink memory corruption
Netlink is byte-swapping data in the guest memory (it's bad).

It's ok when the data come from the host as they are generated by the
host.

But it doesn't work when data come from the guest: the guest can
try to reuse these data whereas they have been byte-swapped.

This is what happens in glibc:

glibc generates a sequence number in nlh.nlmsg_seq and calls
sendto() with this nlh. In sendto(), we byte-swap nlmsg.seq.

Later, after the recvmsg(), glibc compares nlh.nlmsg_seq with
sequence number given in return, and of course it fails (hangs),
because nlh.nlmsg_seq is not valid anymore.

The involved code in glibc is:

sysdeps/unix/sysv/linux/check_pf.c:make_request()
...
  req.nlh.nlmsg_seq = time (NULL);
...
  if (TEMP_FAILURE_RETRY (__sendto (fd, (void *) &req, sizeof (req), 0,
                                    (struct sockaddr *) &nladdr,
                                    sizeof (nladdr))) < 0)
<here req.nlh.nlmsg_seq has been byte-swapped>
...
  do
    {
...
      ssize_t read_len = TEMP_FAILURE_RETRY (__recvmsg (fd, &msg, 0));
...
      struct nlmsghdr *nlmh;
      for (nlmh = (struct nlmsghdr *) buf;
           NLMSG_OK (nlmh, (size_t) read_len);
           nlmh = (struct nlmsghdr *) NLMSG_NEXT (nlmh, read_len))
        {
<we compare nlmh->nlmsg_seq with corrupted req.nlh.nlmsg_seq>
          if (nladdr.nl_pid != 0 || (pid_t) nlmh->nlmsg_pid != pid
              || nlmh->nlmsg_seq != req.nlh.nlmsg_seq)
            continue;
...
          else if (nlmh->nlmsg_type == NLMSG_DONE)
            /* We found the end, leave the loop.  */
            done = true;
        }
    }
  while (! done);

As we have a continue on "nlmh->nlmsg_seq != req.nlh.nlmsg_seq",
"done" cannot be set to "true" and we have an infinite loop.

It's why commands like "apt-get update" or "dnf update hangs".

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:20:59 +03:00
Laurent Vivier
ef759f6fcc linux-user: fd_trans_*_data() returns the length
fd_trans_target_to_host_data() and fd_trans_host_to_target_data() must
return the length of processed data.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:20:58 +03:00
Eric Blake
faecd40a59 block: Simplify drive-mirror
Now that we can support boxed commands, use it to greatly
reduce the number of parameters (and likelihood of getting
out of sync) when adjusting drive-mirror parameters.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-Id: <1468535878-3760-1-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:09 +02:00
Eric Blake
4dc9397b62 block: Simplify block_set_io_throttle
Now that we can support boxed commands, use it to greatly
reduce the number of parameters (and likelihood of getting
out of sync) when adjusting throttle parameters.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-Id: <1468468228-27827-11-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
c818408e44 qapi: Implement boxed types for commands/events
Turn on the ability to pass command and event arguments in
a single boxed parameter, which must name a non-empty type
(although the type can be a struct with all optional members).
For structs, it makes it possible to pass a single qapi type
instead of a breakout of all struct members (useful if the
arguments are already in a struct or if the number of members
is large); for other complex types, it is now possible to use
a union or alternate as the data for a command or event.

The empty type may be technically feasible if needed down the
road, but it's easier to forbid it now and relax things to allow
it later, than it is to allow it now and have to special case
how the generated 'q_empty' type is handled (see commit 7ce106a9
for reasons why nothing is generated for the empty type).  An
alternate type is never considered empty, but now that a boxed
type can be either an object or an alternate, we have to provide
a trivial QAPISchemaAlternateType.is_empty().  The new call to
arg_type.is_empty() during QAPISchemaCommand.check() requires
that we first check the type in question; but there is no chance
of introducing a cycle since objects do not refer back to commands.

We still have a split in syntax checking between ad-hoc parsing
up front (merely validates that 'boxed' has a sane value) and
during .check() methods (if 'boxed' is set, then 'data' must name
a non-empty user-defined type).

Generated code is unchanged, as long as no client uses the
new feature.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-10-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Test files renamed to *-boxed-*]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
48825ca419 qapi: Plumb in 'boxed' to qapi generator lower levels
The next patch will add support for passing a qapi union type
as the 'data' of a command.  But to do that, the user function
for implementing the command, as called by the generated
marshal command, must take the corresponding C struct as a
single boxed pointer, rather than a breakdown into one
parameter per member.  Even without a union, being able to use
a C struct rather than a list of parameters can make it much
easier to handle coding with QAPI.

This patch adds the internal plumbing of a 'boxed' flag
associated with each command and event.  In several cases,
this means adding indentation, with one new dead branch and
the remaining branch being the original code more deeply
nested; this was done so that the new implementation in the
next patch is easier to review without also being mixed with
indentation changes.

For this patch, no behavior or generated output changes, other
than the testsuite outputting the value of the new flag
(always False for now).

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-9-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Identifier box renamed to boxed in two places]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
4d0b268fdb qapi-event: Simplify visit of non-implicit data
Commit 7ce106a9 documented why we don't generated a visit_type_FOO()
for implicit types; and therefore events with an anonymous type for
'data' have to open-code a visit.  Note that the open-coded visit in
qapi-event.c is slightly different from what is done in
qapi-visit.c for normal types, in part because we don't have to
check for *obj being NULL or free things on error.  But where the
type is not implicit, it is nicer to reuse the normal visit instead
of open-coding a duplicate.

At the moment, the only event with a non-implicit 'data' is in the
testsuite, where test-qapi-event.c changes as follows:

|@@ -155,6 +155,7 @@ void qapi_event_send___org_qemu_x_event(
|     __org_qemu_x_Struct param = {
|         __org_qemu_x_member1, (char *)__org_qemu_x_member2, has_q_wchar_t, q_wchar_t
|     };
|+    __org_qemu_x_Struct *arg = &param;
|
|     emit = qmp_event_get_func_emit();
|     if (!emit) {
|@@ -164,16 +165,7 @@ void qapi_event_send___org_qemu_x_event(
|     qmp = qmp_event_build_dict("__ORG.QEMU_X-EVENT");
|
|     v = qmp_output_visitor_new(&obj);
|-
|-    visit_start_struct(v, "__ORG.QEMU_X-EVENT", NULL, 0, &err);
|-    if (err) {
|-        goto out;
|-    }
|-    visit_type___org_qemu_x_Struct_members(v, &param, &err);
|-    if (!err) {
|-    if (!err) {
|-        visit_check_struct(v, &err);
|-    }
|-    visit_end_struct(v, NULL);
|+    visit_type___org_qemu_x_Struct(v, "__ORG.QEMU_X-EVENT", &arg, &err);
|     if (err) {
|         goto out;
|     }

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-8-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
fa274ed6fb qapi: Drop useless gen_err_check()
Ever since commit 12f254f removed the last parameterization
of gen_err_check(), it no longer makes sense to hide the three
lines of generated C code behind a macro call. Just inline it
into the remaining users.

No change to generated code.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-7-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
b616770682 qapi: Add type.is_empty() helper
In the near future, we want to lift our artificial restriction of
no variants at the top level of an event, at which point the
currently open-coded check for empty members will become
insufficient.  Factor it out into a new helper method is_empty()
now, and future-proof it by checking variants, too, along with an
assert that it is not used prior to the completion of .check().
Update places that were checking for (non-)empty .members to use
the new helper.

All of the current callers assert that there are no variants (either
directly, or by qapi.py asserting that base types have no variants),
so this is not a semantic change.

No change to generated code.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-6-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
da9cb19385 qapi: Hide tag_name data member of variants
Clean up the only remaining external use of the tag_name field of
QAPISchemaObjectTypeVariants, by explicitly listing the generated
'type' tag for all variants in the testsuite (you can still tell
simple unions by the -wrapper types).  Then we can mark the
tag_name field as private by adding a leading underscore to prevent
any further use.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-5-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
cd50a25645 qapi: Special case c_name() for empty type
Commit 7ce106a rendered QAPISchemaObjectType.c_name() redundant,
since it now does nothing more than delegate to its superclass.
However, rather than deleting it, we can restore part of the
assertion that was removed in that commit, to prove that we never
emit the empty type directly in generated code, but rather
special-case it as a built-in that makes other aspects of code
generation easier to reason about.

Reported-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-4-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
d0b182392d qapi: Require all branches of flat union enum to be covered
We were previously enforcing that all flat union branches were
found in the corresponding enum, but not that all enum values
were covered by branches.  The resulting generated code would
abort() if the user passes the uncovered enum value.

We don't automatically treat non-present branches in a flat
union as empty types, for symmetry with simple unions (there,
the enum type is generated from the list of all branches, so
there is no way to omit a branch but still have it be part of
the union).

A later patch will add shorthand so that branches that are empty
in flat unions can be declared as 'branch':{} instead of
'branch':'Empty', to avoid the need for an otherwise useless
explicit empty type.  [Such shorthand for simple unions is a bit
harder to justify, since we would still have to generate a
wrapper type that parses 'data':{}, rather than truly being an
empty branch with no additional siblings to the 'type' member.]

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-3-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Kővágó, Zoltán
cebea51057 net: use Netdev instead of NetClientOptions in client init
This way we no longer need NetClientOptions and can convert Netdev
into a flat union.

Signed-off-by: Kővágó, Zoltán <DirtY.iCE.hu@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <93ffdfed7054529635e6acb935150d95dc173a12.1441627176.git.DirtY.iCE.hu@gmail.com>

[rework net_client_init1() to pass Netdev by copying from NetdevLegacy,
rather than merging the two types - which means that we still need
NetClientOptions after all.  Rebase to qapi changes. The bulk of the
patch is mechanical, replacing 'opts' by 'netdev->opts', while
net_client_init1() takes care of converting between legacy and modern
types.]

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-2-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Paolo Bonzini
3d344c2aab qapi: change QmpInputVisitor to QSLIST
This saves a lot of memory compared to a statically-sized array,
or at least 24kb could be considered a lot on an Atari ST.
It also makes the code more similar to QmpOutputVisitor.

This removes the limit on the depth of a QObject that can be processed
into a QAPI tree.  This is not a problem because QObjects can be
considered trusted; the text received on the QMP wire is untrusted
input, but the JSON parser already takes pains to limit the QObject tree
it creates.  We don't need the QMP input visitor to limit it again.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1467906798-5312-3-git-send-email-pbonzini@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[Commit message typo fixed]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Paolo Bonzini
fc76ae8b38 qapi: change QmpOutputVisitor to QSLIST
This saves a little memory compared to the doubly-linked QTAILQ.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1467906798-5312-2-git-send-email-pbonzini@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[Comment tweaked to avoid long line]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
594 changed files with 23476 additions and 9990 deletions

View File

@@ -31,7 +31,11 @@ Do not leave whitespace dangling off the ends of lines.
2. Line width
Lines are 80 characters; not longer.
Lines should be 80 characters; try not to make them longer.
Sometimes it is hard to do, especially when dealing with QEMU subsystems
that use long function or symbol names. Even in that case, do not make
lines much longer than 80 characters.
Rationale:
- Some people like to tile their 24" screens with a 6x4 matrix of 80x24
@@ -39,6 +43,8 @@ Rationale:
let them keep doing it.
- Code and especially patches is much more readable if limited to a sane
line length. Eighty is traditional.
- The four-space indentation makes the most common excuse ("But look
at all that white space on the left!") moot.
- It is the QEMU coding style.
3. Naming

View File

@@ -158,6 +158,10 @@ painful. These are:
* you may assume that right shift of a signed integer duplicates
the sign bit (ie it is an arithmetic shift, not a logical shift)
In addition, QEMU assumes that the compiler does not use the latitude
given in C99 and C11 to treat aspects of signed '<<' as undefined, as
documented in the GNU Compiler Collection manual starting at version 4.0.
7. Error handling and reporting
7.1 Reporting errors to the human user

View File

@@ -449,23 +449,23 @@ S: Maintained
F: hw/*/versatile*
Xilinx Zynq
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
M: Alistair Francis <alistair.francis@xilinx.com>
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/xilinx_zynq.c
F: hw/misc/zynq_slcr.c
F: hw/*/xilinx_*
F: hw/*/cadence_*
F: hw/ssi/xilinx_spips.c
F: hw/misc/zynq_slcr.c
F: include/hw/xilinx.h
X: hw/ssi/xilinx_*
Xilinx ZynqMP
M: Alistair Francis <alistair.francis@xilinx.com>
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/xlnx-zynqmp.c
F: hw/arm/xlnx-ep108.c
F: include/hw/arm/xlnx-zynqmp.h
F: hw/*/xlnx*.c
F: include/hw/*/xlnx*.c
ARM ACPI Subsystem
M: Shannon Zhao <zhaoshenglong@huawei.com>
@@ -948,14 +948,6 @@ S: Supported
F: hw/scsi/megasas.c
F: hw/scsi/mfi.h
Xilinx EDK
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
M: Alistair Francis <alistair.francis@xilinx.com>
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
S: Maintained
F: hw/*/xilinx_*
F: include/hw/xilinx.h
Network packet abstractions
M: Dmitry Fleytman <dmitry@daynix.com>
S: Maintained
@@ -1177,6 +1169,13 @@ F: numa.c
F: include/sysemu/numa.h
T: git git://github.com/ehabkost/qemu.git numa
Host Memory Backends
M: Eduardo Habkost <ehabkost@redhat.com>
M: Igor Mammedov <imammedo@redhat.com>
S: Maintained
F: backends/hostmem*.c
F: include/sysemu/hostmem.h
QAPI
M: Markus Armbruster <armbru@redhat.com>
M: Michael Roth <mdroth@linux.vnet.ibm.com>
@@ -1243,6 +1242,12 @@ F: docs/*qmp-*
F: scripts/qmp/
T: git git://repo.or.cz/qemu/armbru.git qapi-next
Register API
M: Alistair Francis <alistair.francis@xilinx.com>
S: Maintained
F: hw/core/register.c
F: include/hw/register.h
SLIRP
M: Samuel Thibault <samuel.thibault@ens-lyon.org>
M: Jan Kiszka <jan.kiszka@siemens.com>

View File

@@ -225,8 +225,9 @@ dtc/%:
$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))
ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
# Only keep -O and -g cflags
romsubdir-%:
$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pc-bios/$* V="$(V)" TARGET_DIR="$*/",)
$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pc-bios/$* V="$(V)" TARGET_DIR="$*/" CFLAGS="$(filter -O% -g%,$(CFLAGS))",)
ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))

View File

@@ -153,6 +153,7 @@ trace-events-y += hw/alpha/trace-events
trace-events-y += ui/trace-events
trace-events-y += audio/trace-events
trace-events-y += net/trace-events
trace-events-y += target-i386/trace-events
trace-events-y += target-sparc/trace-events
trace-events-y += target-s390x/trace-events
trace-events-y += target-ppc/trace-events

View File

@@ -212,7 +212,7 @@ hmp-commands-info.h: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxtool
qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@," GEN $(TARGET_DIR)$@")
clean:
clean: clean-target
rm -f *.a *~ $(PROGS)
rm -f $(shell find . -name '*.[od]')
rm -f hmp-commands.h qmp-commands-old.h gdbstub-xml.c

View File

@@ -1,5 +1,5 @@
QEMU
---
QEMU README
===========
QEMU is a generic and open source machine & userspace emulator and
virtualizer.
@@ -31,31 +31,31 @@ version 2. For full licensing details, consult the LICENSE file.
Building
---
========
QEMU is multi-platform software intended to be buildable on all modern
Linux platforms, OS-X, Win32 (via the Mingw64 toolchain) and a variety
of other UNIX targets. The simple steps to build QEMU are:
mkdir build
cd build
../configure
make
mkdir build
cd build
../configure
make
Complete details of the process for building and configuring QEMU for
all supported host platforms can be found in the qemu-tech.html file.
Additional information can also be found online via the QEMU website:
http://qemu-project.org/Hosts/Linux
http://qemu-project.org/Hosts/W32
http://qemu-project.org/Hosts/Linux
http://qemu-project.org/Hosts/W32
Submitting patches
---
==================
The QEMU source code is maintained under the GIT version control system.
git clone git://git.qemu-project.org/qemu.git
git clone git://git.qemu-project.org/qemu.git
When submitting patches, the preferred approach is to use 'git
format-patch' and/or 'git send-email' to format & send the mail to the
@@ -66,18 +66,18 @@ guidelines set out in the HACKING and CODING_STYLE files.
Additional information on submitting patches can be found online via
the QEMU website
http://qemu-project.org/Contribute/SubmitAPatch
http://qemu-project.org/Contribute/TrivialPatches
http://qemu-project.org/Contribute/SubmitAPatch
http://qemu-project.org/Contribute/TrivialPatches
Bug reporting
---
=============
The QEMU project uses Launchpad as its primary upstream bug tracker. Bugs
found when running code built from QEMU git or upstream released sources
should be reported via:
https://bugs.launchpad.net/qemu/
https://bugs.launchpad.net/qemu/
If using QEMU via an operating system vendor pre-built binary package, it
is preferable to report bugs to the vendor's own bug tracker first. If
@@ -86,21 +86,22 @@ reported via launchpad.
For additional information on bug reporting consult:
http://qemu-project.org/Contribute/ReportABug
http://qemu-project.org/Contribute/ReportABug
Contact
---
=======
The QEMU community can be contacted in a number of ways, with the two
main methods being email and IRC
- Mailing List: qemu-devel@nongnu.org
- Archives: http://lists.nongnu.org/mailman/listinfo/qemu-devel
- IRC: #qemu on irc.oftc.net
- qemu-devel@nongnu.org
http://lists.nongnu.org/mailman/listinfo/qemu-devel
- #qemu on irc.oftc.net
Information on additional methods of contacting the community can be
found online via the QEMU website:
http://qemu-project.org/Contribute/StartHere
-- End

View File

@@ -1 +1 @@
2.6.50
2.7.50

View File

@@ -1739,13 +1739,21 @@ static void audio_vm_change_state_handler (void *opaque, int running,
audio_reset_timer (s);
}
static void audio_atexit (void)
static bool is_cleaning_up;
bool audio_is_cleaning_up(void)
{
return is_cleaning_up;
}
void audio_cleanup(void)
{
AudioState *s = &glob_audio_state;
HWVoiceOut *hwo = NULL;
HWVoiceIn *hwi = NULL;
HWVoiceOut *hwo, *hwon;
HWVoiceIn *hwi, *hwin;
while ((hwo = audio_pcm_hw_find_any_out (hwo))) {
is_cleaning_up = true;
QLIST_FOREACH_SAFE(hwo, &glob_audio_state.hw_head_out, entries, hwon) {
SWVoiceCap *sc;
if (hwo->enabled) {
@@ -1761,17 +1769,20 @@ static void audio_atexit (void)
cb->ops.destroy (cb->opaque);
}
}
QLIST_REMOVE(hwo, entries);
}
while ((hwi = audio_pcm_hw_find_any_in (hwi))) {
QLIST_FOREACH_SAFE(hwi, &glob_audio_state.hw_head_in, entries, hwin) {
if (hwi->enabled) {
hwi->pcm_ops->ctl_in (hwi, VOICE_DISABLE);
}
hwi->pcm_ops->fini_in (hwi);
QLIST_REMOVE(hwi, entries);
}
if (s->drv) {
s->drv->fini (s->drv_opaque);
s->drv = NULL;
}
}
@@ -1799,7 +1810,7 @@ static void audio_init (void)
QLIST_INIT (&s->hw_head_out);
QLIST_INIT (&s->hw_head_in);
QLIST_INIT (&s->cap_head);
atexit (audio_atexit);
atexit(audio_cleanup);
s->ts = timer_new_ns(QEMU_CLOCK_VIRTUAL, audio_timer, s);
@@ -1966,8 +1977,7 @@ CaptureVoiceOut *AUD_add_capture (
QLIST_INSERT_HEAD (&s->cap_head, cap, entries);
QLIST_INSERT_HEAD (&cap->cb_head, cb, entries);
hw = NULL;
while ((hw = audio_pcm_hw_find_any_out (hw))) {
QLIST_FOREACH(hw, &glob_audio_state.hw_head_out, entries) {
audio_attach_capture (hw);
}
return cap;

View File

@@ -163,4 +163,7 @@ static inline void *advance (void *p, int incr)
int wav_start_capture (CaptureState *s, const char *path, int freq,
int bits, int nchannels);
bool audio_is_cleaning_up(void);
void audio_cleanup(void);
#endif /* QEMU_AUDIO_H */

View File

@@ -36,8 +36,6 @@
#define MAC_OS_X_VERSION_10_6 1060
#endif
static int isAtexit;
typedef struct {
int buffer_frames;
int nbuffers;
@@ -378,11 +376,6 @@ static inline UInt32 isPlaying (AudioDeviceID outputDeviceID)
return result;
}
static void coreaudio_atexit (void)
{
isAtexit = 1;
}
static int coreaudio_lock (coreaudioVoiceOut *core, const char *fn_name)
{
int err;
@@ -630,7 +623,7 @@ static void coreaudio_fini_out (HWVoiceOut *hw)
int err;
coreaudioVoiceOut *core = (coreaudioVoiceOut *) hw;
if (!isAtexit) {
if (!audio_is_cleaning_up()) {
/* stop playback */
if (isPlaying(core->outputDeviceID)) {
status = AudioDeviceStop(core->outputDeviceID, core->ioprocid);
@@ -673,7 +666,7 @@ static int coreaudio_ctl_out (HWVoiceOut *hw, int cmd, ...)
case VOICE_DISABLE:
/* stop playback */
if (!isAtexit) {
if (!audio_is_cleaning_up()) {
if (isPlaying(core->outputDeviceID)) {
status = AudioDeviceStop(core->outputDeviceID,
core->ioprocid);
@@ -697,7 +690,6 @@ static void *coreaudio_audio_init (void)
CoreaudioConf *conf = g_malloc(sizeof(CoreaudioConf));
*conf = glob_conf;
atexit(coreaudio_atexit);
return conf;
}

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# audio/alsaaudio.c
alsa_revents(int revents) "revents = %d"

View File

@@ -203,6 +203,7 @@ static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
static void host_memory_backend_set_prealloc(Object *obj, bool value,
Error **errp)
{
Error *local_err = NULL;
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
if (backend->force_prealloc) {
@@ -223,7 +224,11 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
void *ptr = memory_region_get_ram_ptr(&backend->mr);
uint64_t sz = memory_region_size(&backend->mr);
os_mem_prealloc(fd, ptr, sz);
os_mem_prealloc(fd, ptr, sz, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
backend->prealloc = true;
}
}
@@ -286,8 +291,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
if (bc->alloc) {
bc->alloc(backend, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
goto out;
}
ptr = memory_region_get_ram_ptr(&backend->mr);
@@ -343,9 +347,15 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
* specified NUMA policy in place.
*/
if (backend->prealloc) {
os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz);
os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
&local_err);
if (local_err) {
goto out;
}
}
}
out:
error_propagate(errp, local_err);
}
static bool

View File

@@ -25,6 +25,7 @@
#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob.h"
#include "block/nbd.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "qapi/qmp/qerror.h"
@@ -2206,6 +2207,7 @@ static void bdrv_close(BlockDriverState *bs)
void bdrv_close_all(void)
{
block_job_cancel_sync_all();
nbd_export_close_all();
/* Drop references from requests still in flight, such as canceled block
* jobs whose AIO context has not been polled yet */
@@ -2837,7 +2839,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
{
BlockDriverInfo bdi;
if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
if (!(bs->open_flags & BDRV_O_UNMAP)) {
return false;
}

View File

@@ -47,6 +47,7 @@ typedef struct BackupBlockJob {
uint64_t sectors_read;
unsigned long *done_bitmap;
int64_t cluster_size;
bool compress;
NotifierWithReturn before_write;
QLIST_HEAD(, CowRequest) inflight_reqs;
} BackupBlockJob;
@@ -154,7 +155,8 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
} else {
ret = blk_co_pwritev(job->target, start * job->cluster_size,
bounce_qiov.size, &bounce_qiov, 0);
bounce_qiov.size, &bounce_qiov,
job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
}
if (ret < 0) {
trace_backup_do_cow_write_fail(job, start, ret);
@@ -477,6 +479,7 @@ static void coroutine_fn backup_run(void *opaque)
void backup_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, int64_t speed,
MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
bool compress,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockCompletionFunc *cb, void *opaque,
@@ -507,6 +510,12 @@ void backup_start(const char *job_id, BlockDriverState *bs,
return;
}
if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
error_setg(errp, "Compression is not supported for this drive %s",
bdrv_get_device_name(target));
return;
}
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
return;
}
@@ -555,6 +564,7 @@ void backup_start(const char *job_id, BlockDriverState *bs,
job->sync_mode = sync_mode;
job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
sync_bitmap : NULL;
job->compress = compress;
/* If there is no backing file on the target, we cannot rely on COW if our
* backup cluster size is smaller than the target cluster size. Even for

View File

@@ -39,6 +39,9 @@ typedef struct BDRVBlkdebugState {
int new_state;
int align;
/* For blkdebug_refresh_filename() */
char *config_file;
QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
@@ -351,7 +354,6 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
BDRVBlkdebugState *s = bs->opaque;
QemuOpts *opts;
Error *local_err = NULL;
const char *config;
uint64_t align;
int ret;
@@ -364,8 +366,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
}
/* Read rules from config file or command line options */
config = qemu_opt_get(opts, "config");
ret = read_config(s, config, options, errp);
s->config_file = g_strdup(qemu_opt_get(opts, "config"));
ret = read_config(s, s->config_file, options, errp);
if (ret) {
goto out;
}
@@ -398,6 +400,9 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
fail_unref:
bdrv_unref_child(bs, bs->file);
out:
if (ret < 0) {
g_free(s->config_file);
}
qemu_opts_del(opts);
return ret;
}
@@ -515,6 +520,8 @@ static void blkdebug_close(BlockDriverState *bs)
remove_rule(rule);
}
}
g_free(s->config_file);
}
static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
@@ -679,6 +686,7 @@ static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVBlkdebugState *s = bs->opaque;
QDict *opts;
const QDictEntry *e;
bool force_json = false;
@@ -700,8 +708,7 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
if (!force_json && bs->file->bs->exact_filename[0]) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkdebug:%s:%s",
qdict_get_try_str(options, "config") ?: "",
"blkdebug:%s:%s", s->config_file ?: "",
bs->file->bs->exact_filename);
}

View File

@@ -114,11 +114,11 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
return ret;
}
static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
{
uint64_t reqid = request_id++;
int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
int ret = bdrv_co_pdiscard(bs->file->bs, offset, count);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
@@ -148,7 +148,7 @@ static BlockDriver bdrv_blkreplay = {
.bdrv_co_pwritev = blkreplay_co_pwritev,
.bdrv_co_pwrite_zeroes = blkreplay_co_pwrite_zeroes,
.bdrv_co_discard = blkreplay_co_discard,
.bdrv_co_pdiscard = blkreplay_co_pdiscard,
.bdrv_co_flush = blkreplay_co_flush,
};

View File

@@ -409,6 +409,22 @@ bool bdrv_has_blk(BlockDriverState *bs)
return bdrv_first_blk(bs) != NULL;
}
/*
* Returns true if @bs has only BlockBackends as parents.
*/
bool bdrv_is_root_node(BlockDriverState *bs)
{
BdrvChild *c;
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c->role != &child_root) {
return false;
}
}
return true;
}
/*
* Return @blk's DriveInfo if any, else null.
*/
@@ -727,21 +743,6 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
return 0;
}
static int blk_check_request(BlockBackend *blk, int64_t sector_num,
int nb_sectors)
{
if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) {
return -EIO;
}
if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
return -EIO;
}
return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE);
}
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
@@ -1065,16 +1066,16 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk,
return bdrv_aio_flush(blk_bs(blk), cb, opaque);
}
BlockAIOCB *blk_aio_discard(BlockBackend *blk,
int64_t sector_num, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
int ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return blk_abort_aio_request(blk, cb, opaque, ret);
}
return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque);
return bdrv_aio_pdiscard(blk_bs(blk), offset, count, cb, opaque);
}
void blk_aio_cancel(BlockAIOCB *acb)
@@ -1106,14 +1107,14 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
}
int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
int ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return ret;
}
return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors);
return bdrv_co_pdiscard(blk_bs(blk), offset, count);
}
int blk_co_flush(BlockBackend *blk)
@@ -1484,15 +1485,11 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
flags | BDRV_REQ_ZERO_WRITE);
}
int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
int count)
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
if (ret < 0) {
return ret;
}
return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
BDRV_REQ_WRITE_COMPRESSED);
}
int blk_truncate(BlockBackend *blk, int64_t offset)
@@ -1504,14 +1501,14 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
return bdrv_truncate(blk_bs(blk), offset);
}
int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
int ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return ret;
}
return bdrv_discard(blk_bs(blk), sector_num, nb_sectors);
return bdrv_pdiscard(blk_bs(blk), offset, count);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,

View File

@@ -563,6 +563,53 @@ static int block_crypto_create_luks(const char *filename,
filename, opts, errp);
}
static int block_crypto_get_info_luks(BlockDriverState *bs,
BlockDriverInfo *bdi)
{
BlockDriverInfo subbdi;
int ret;
ret = bdrv_get_info(bs->file->bs, &subbdi);
if (ret != 0) {
return ret;
}
bdi->unallocated_blocks_are_zero = false;
bdi->can_write_zeroes_with_unmap = false;
bdi->cluster_size = subbdi.cluster_size;
return 0;
}
static ImageInfoSpecific *
block_crypto_get_specific_info_luks(BlockDriverState *bs)
{
BlockCrypto *crypto = bs->opaque;
ImageInfoSpecific *spec_info;
QCryptoBlockInfo *info;
info = qcrypto_block_get_info(crypto->block, NULL);
if (!info) {
return NULL;
}
if (info->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) {
qapi_free_QCryptoBlockInfo(info);
return NULL;
}
spec_info = g_new(ImageInfoSpecific, 1);
spec_info->type = IMAGE_INFO_SPECIFIC_KIND_LUKS;
spec_info->u.luks.data = g_new(QCryptoBlockInfoLUKS, 1);
*spec_info->u.luks.data = info->u.luks;
/* Blank out pointers we've just stolen to avoid double free */
memset(&info->u.luks, 0, sizeof(info->u.luks));
qapi_free_QCryptoBlockInfo(info);
return spec_info;
}
BlockDriver bdrv_crypto_luks = {
.format_name = "luks",
.instance_size = sizeof(BlockCrypto),
@@ -576,6 +623,8 @@ BlockDriver bdrv_crypto_luks = {
.bdrv_co_readv = block_crypto_co_readv,
.bdrv_co_writev = block_crypto_co_writev,
.bdrv_getlength = block_crypto_getlength,
.bdrv_get_info = block_crypto_get_info_luks,
.bdrv_get_specific_info = block_crypto_get_specific_info_luks,
};
static void block_crypto_init(void)

View File

@@ -169,7 +169,7 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
state->sock_fd = fd;
s = state->s;
DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, (int)fd);
switch (action) {
case CURL_POLL_IN:
aio_set_fd_handler(s->aio_context, fd, false,

View File

@@ -326,14 +326,14 @@ void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
}
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int nr_sectors)
int64_t cur_sector, int64_t nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int nr_sectors)
int64_t cur_sector, int64_t nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
@@ -361,7 +361,7 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
}
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
int nr_sectors)
int64_t nr_sectors)
{
BdrvDirtyBitmap *bitmap;
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {

View File

@@ -11,7 +11,27 @@
#include <glusterfs/api/glfs.h>
#include "block/block_int.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
#define GLUSTER_OPT_FILENAME "filename"
#define GLUSTER_OPT_VOLUME "volume"
#define GLUSTER_OPT_PATH "path"
#define GLUSTER_OPT_TYPE "type"
#define GLUSTER_OPT_SERVER_PATTERN "server."
#define GLUSTER_OPT_HOST "host"
#define GLUSTER_OPT_PORT "port"
#define GLUSTER_OPT_TO "to"
#define GLUSTER_OPT_IPV4 "ipv4"
#define GLUSTER_OPT_IPV6 "ipv6"
#define GLUSTER_OPT_SOCKET "socket"
#define GLUSTER_OPT_DEBUG "debug"
#define GLUSTER_DEFAULT_PORT 24007
#define GLUSTER_DEBUG_DEFAULT 4
#define GLUSTER_DEBUG_MAX 9
#define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n"
typedef struct GlusterAIOCB {
int64_t size;
@@ -28,27 +48,141 @@ typedef struct BDRVGlusterState {
int debug_level;
} BDRVGlusterState;
typedef struct GlusterConf {
char *server;
int port;
char *volname;
char *image;
char *transport;
int debug_level;
} GlusterConf;
typedef struct BDRVGlusterReopenState {
struct glfs *glfs;
struct glfs_fd *fd;
} BDRVGlusterReopenState;
static void qemu_gluster_gconf_free(GlusterConf *gconf)
{
if (gconf) {
g_free(gconf->server);
g_free(gconf->volname);
g_free(gconf->image);
g_free(gconf->transport);
g_free(gconf);
static QemuOptsList qemu_gluster_create_opts = {
.name = "qemu-gluster-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
.desc = {
{
.name = BLOCK_OPT_SIZE,
.type = QEMU_OPT_SIZE,
.help = "Virtual disk size"
},
{
.name = BLOCK_OPT_PREALLOC,
.type = QEMU_OPT_STRING,
.help = "Preallocation mode (allowed values: off, full)"
},
{
.name = GLUSTER_OPT_DEBUG,
.type = QEMU_OPT_NUMBER,
.help = "Gluster log level, valid range is 0-9",
},
{ /* end of list */ }
}
}
};
static int parse_volume_options(GlusterConf *gconf, char *path)
static QemuOptsList runtime_opts = {
.name = "gluster",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = GLUSTER_OPT_FILENAME,
.type = QEMU_OPT_STRING,
.help = "URL to the gluster image",
},
{
.name = GLUSTER_OPT_DEBUG,
.type = QEMU_OPT_NUMBER,
.help = "Gluster log level, valid range is 0-9",
},
{ /* end of list */ }
},
};
static QemuOptsList runtime_json_opts = {
.name = "gluster_json",
.head = QTAILQ_HEAD_INITIALIZER(runtime_json_opts.head),
.desc = {
{
.name = GLUSTER_OPT_VOLUME,
.type = QEMU_OPT_STRING,
.help = "name of gluster volume where VM image resides",
},
{
.name = GLUSTER_OPT_PATH,
.type = QEMU_OPT_STRING,
.help = "absolute path to image file in gluster volume",
},
{
.name = GLUSTER_OPT_DEBUG,
.type = QEMU_OPT_NUMBER,
.help = "Gluster log level, valid range is 0-9",
},
{ /* end of list */ }
},
};
static QemuOptsList runtime_type_opts = {
.name = "gluster_type",
.head = QTAILQ_HEAD_INITIALIZER(runtime_type_opts.head),
.desc = {
{
.name = GLUSTER_OPT_TYPE,
.type = QEMU_OPT_STRING,
.help = "tcp|unix",
},
{ /* end of list */ }
},
};
static QemuOptsList runtime_unix_opts = {
.name = "gluster_unix",
.head = QTAILQ_HEAD_INITIALIZER(runtime_unix_opts.head),
.desc = {
{
.name = GLUSTER_OPT_SOCKET,
.type = QEMU_OPT_STRING,
.help = "socket file path)",
},
{ /* end of list */ }
},
};
static QemuOptsList runtime_tcp_opts = {
.name = "gluster_tcp",
.head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
.desc = {
{
.name = GLUSTER_OPT_TYPE,
.type = QEMU_OPT_STRING,
.help = "tcp|unix",
},
{
.name = GLUSTER_OPT_HOST,
.type = QEMU_OPT_STRING,
.help = "host address (hostname/ipv4/ipv6 addresses)",
},
{
.name = GLUSTER_OPT_PORT,
.type = QEMU_OPT_NUMBER,
.help = "port number on which glusterd is listening (default 24007)",
},
{
.name = "to",
.type = QEMU_OPT_NUMBER,
.help = "max port number, not supported by gluster",
},
{
.name = "ipv4",
.type = QEMU_OPT_BOOL,
.help = "ipv4 bool value, not supported by gluster",
},
{
.name = "ipv6",
.type = QEMU_OPT_BOOL,
.help = "ipv6 bool value, not supported by gluster",
},
{ /* end of list */ }
},
};
static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
{
char *p, *q;
@@ -62,31 +196,29 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
if (*p == '\0') {
return -EINVAL;
}
gconf->volname = g_strndup(q, p - q);
gconf->volume = g_strndup(q, p - q);
/* image */
/* path */
p += strspn(p, "/");
if (*p == '\0') {
return -EINVAL;
}
gconf->image = g_strdup(p);
gconf->path = g_strdup(p);
return 0;
}
/*
* file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
* file=gluster[+transport]://[host[:port]]/volume/path[?socket=...]
*
* 'gluster' is the protocol.
*
* 'transport' specifies the transport type used to connect to gluster
* management daemon (glusterd). Valid transport types are
* tcp, unix and rdma. If a transport type isn't specified, then tcp
* type is assumed.
* tcp or unix. If a transport type isn't specified, then tcp type is assumed.
*
* 'server' specifies the server where the volume file specification for
* the given volume resides. This can be either hostname, ipv4 address
* or ipv6 address. ipv6 address needs to be within square brackets [ ].
* If transport type is 'unix', then 'server' field should not be specified.
* 'host' specifies the host where the volume file specification for
* the given volume resides. This can be either hostname or ipv4 address.
* If transport type is 'unix', then 'host' field should not be specified.
* The 'socket' field needs to be populated with the path to unix domain
* socket.
*
@@ -95,23 +227,22 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
* default port. If the transport type is unix, then 'port' should not be
* specified.
*
* 'volname' is the name of the gluster volume which contains the VM image.
* 'volume' is the name of the gluster volume which contains the VM image.
*
* 'image' is the path to the actual VM image that resides on gluster volume.
* 'path' is the path to the actual VM image that resides on gluster volume.
*
* Examples:
*
* file=gluster://1.2.3.4/testvol/a.img
* file=gluster+tcp://1.2.3.4/testvol/a.img
* file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
* file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
* file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
* file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
* file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img
* file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
* file=gluster+rdma://1.2.3.4:24007/testvol/a.img
*/
static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
const char *filename)
{
GlusterServer *gsconf;
URI *uri;
QueryParams *qp = NULL;
bool is_unix = false;
@@ -122,16 +253,21 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
return -EINVAL;
}
gconf->server = g_new0(GlusterServerList, 1);
gconf->server->value = gsconf = g_new0(GlusterServer, 1);
/* transport */
if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
gconf->transport = g_strdup("tcp");
gsconf->type = GLUSTER_TRANSPORT_TCP;
} else if (!strcmp(uri->scheme, "gluster+tcp")) {
gconf->transport = g_strdup("tcp");
gsconf->type = GLUSTER_TRANSPORT_TCP;
} else if (!strcmp(uri->scheme, "gluster+unix")) {
gconf->transport = g_strdup("unix");
gsconf->type = GLUSTER_TRANSPORT_UNIX;
is_unix = true;
} else if (!strcmp(uri->scheme, "gluster+rdma")) {
gconf->transport = g_strdup("rdma");
gsconf->type = GLUSTER_TRANSPORT_TCP;
error_report("Warning: rdma feature is not supported, falling "
"back to tcp");
} else {
ret = -EINVAL;
goto out;
@@ -157,10 +293,14 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
ret = -EINVAL;
goto out;
}
gconf->server = g_strdup(qp->p[0].value);
gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
} else {
gconf->server = g_strdup(uri->server ? uri->server : "localhost");
gconf->port = uri->port;
gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost");
if (uri->port) {
gsconf->u.tcp.port = g_strdup_printf("%d", uri->port);
} else {
gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
}
}
out:
@@ -171,30 +311,34 @@ out:
return ret;
}
static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
Error **errp)
static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
Error **errp)
{
struct glfs *glfs = NULL;
struct glfs *glfs;
int ret;
int old_errno;
GlusterServerList *server;
ret = qemu_gluster_parseuri(gconf, filename);
if (ret < 0) {
error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
"volname/image[?socket=...]");
errno = -ret;
goto out;
}
glfs = glfs_new(gconf->volname);
glfs = glfs_new(gconf->volume);
if (!glfs) {
goto out;
}
ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
gconf->port);
if (ret < 0) {
goto out;
for (server = gconf->server; server; server = server->next) {
if (server->value->type == GLUSTER_TRANSPORT_UNIX) {
ret = glfs_set_volfile_server(glfs,
GlusterTransport_lookup[server->value->type],
server->value->u.q_unix.path, 0);
} else {
ret = glfs_set_volfile_server(glfs,
GlusterTransport_lookup[server->value->type],
server->value->u.tcp.host,
atoi(server->value->u.tcp.port));
}
if (ret < 0) {
goto out;
}
}
ret = glfs_set_logging(glfs, "-", gconf->debug_level);
@@ -204,15 +348,25 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
ret = glfs_init(glfs);
if (ret) {
error_setg_errno(errp, errno,
"Gluster connection failed for server=%s port=%d "
"volume=%s image=%s transport=%s", gconf->server,
gconf->port, gconf->volname, gconf->image,
gconf->transport);
error_setg(errp, "Gluster connection for volume %s, path %s failed"
" to connect", gconf->volume, gconf->path);
for (server = gconf->server; server; server = server->next) {
if (server->value->type == GLUSTER_TRANSPORT_UNIX) {
error_append_hint(errp, "hint: failed on socket %s ",
server->value->u.q_unix.path);
} else {
error_append_hint(errp, "hint: failed on host %s and port %s ",
server->value->u.tcp.host,
server->value->u.tcp.port);
}
}
error_append_hint(errp, "Please refer to gluster logs for more info\n");
/* glfs_init sometimes doesn't set errno although docs suggest that */
if (errno == 0)
if (errno == 0) {
errno = EINVAL;
}
goto out;
}
@@ -227,6 +381,226 @@ out:
return NULL;
}
static int qapi_enum_parse(const char *opt)
{
int i;
if (!opt) {
return GLUSTER_TRANSPORT__MAX;
}
for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) {
if (!strcmp(opt, GlusterTransport_lookup[i])) {
return i;
}
}
return i;
}
/*
* Convert the json formatted command line into qapi.
*/
static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
QDict *options, Error **errp)
{
QemuOpts *opts;
GlusterServer *gsconf;
GlusterServerList *curr = NULL;
QDict *backing_options = NULL;
Error *local_err = NULL;
char *str = NULL;
const char *ptr;
size_t num_servers;
int i;
/* create opts info from runtime_json_opts list */
opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
goto out;
}
num_servers = qdict_array_entries(options, GLUSTER_OPT_SERVER_PATTERN);
if (num_servers < 1) {
error_setg(&local_err, QERR_MISSING_PARAMETER, "server");
goto out;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_VOLUME);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_VOLUME);
goto out;
}
gconf->volume = g_strdup(ptr);
ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_PATH);
goto out;
}
gconf->path = g_strdup(ptr);
qemu_opts_del(opts);
for (i = 0; i < num_servers; i++) {
str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i);
qdict_extract_subqdict(options, &backing_options, str);
/* create opts info from runtime_type_opts list */
opts = qemu_opts_create(&runtime_type_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, backing_options, &local_err);
if (local_err) {
goto out;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
gsconf = g_new0(GlusterServer, 1);
gsconf->type = qapi_enum_parse(ptr);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
if (gsconf->type == GLUSTER_TRANSPORT__MAX) {
error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE,
GLUSTER_OPT_TYPE, "tcp or unix");
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
qemu_opts_del(opts);
if (gsconf->type == GLUSTER_TRANSPORT_TCP) {
/* create opts info from runtime_tcp_opts list */
opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, backing_options, &local_err);
if (local_err) {
goto out;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_HOST);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER,
GLUSTER_OPT_HOST);
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
gsconf->u.tcp.host = g_strdup(ptr);
ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER,
GLUSTER_OPT_PORT);
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
gsconf->u.tcp.port = g_strdup(ptr);
/* defend for unsupported fields in InetSocketAddress,
* i.e. @ipv4, @ipv6 and @to
*/
ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
if (ptr) {
gsconf->u.tcp.has_to = true;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
if (ptr) {
gsconf->u.tcp.has_ipv4 = true;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
if (ptr) {
gsconf->u.tcp.has_ipv6 = true;
}
if (gsconf->u.tcp.has_to) {
error_setg(&local_err, "Parameter 'to' not supported");
goto out;
}
if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) {
error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
goto out;
}
qemu_opts_del(opts);
} else {
/* create opts info from runtime_unix_opts list */
opts = qemu_opts_create(&runtime_unix_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, backing_options, &local_err);
if (local_err) {
goto out;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER,
GLUSTER_OPT_SOCKET);
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
gsconf->u.q_unix.path = g_strdup(ptr);
qemu_opts_del(opts);
}
if (gconf->server == NULL) {
gconf->server = g_new0(GlusterServerList, 1);
gconf->server->value = gsconf;
curr = gconf->server;
} else {
curr->next = g_new0(GlusterServerList, 1);
curr->next->value = gsconf;
curr = curr->next;
}
qdict_del(backing_options, str);
g_free(str);
str = NULL;
}
return 0;
out:
error_propagate(errp, local_err);
qemu_opts_del(opts);
if (str) {
qdict_del(backing_options, str);
g_free(str);
}
errno = EINVAL;
return -errno;
}
static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
const char *filename,
QDict *options, Error **errp)
{
int ret;
if (filename) {
ret = qemu_gluster_parse_uri(gconf, filename);
if (ret < 0) {
error_setg(errp, "invalid URI");
error_append_hint(errp, "Usage: file=gluster[+transport]://"
"[host[:port]]/volume/path[?socket=...]\n");
errno = -ret;
return NULL;
}
} else {
ret = qemu_gluster_parse_json(gconf, options, errp);
if (ret < 0) {
error_append_hint(errp, "Usage: "
"-drive driver=qcow2,file.driver=gluster,"
"file.volume=testvol,file.path=/path/a.qcow2"
"[,file.debug=9],file.server.0.type=tcp,"
"file.server.0.host=1.2.3.4,"
"file.server.0.port=24007,"
"file.server.1.transport=unix,"
"file.server.1.socket=/var/run/glusterd.socket ..."
"\n");
errno = -ret;
return NULL;
}
}
return qemu_gluster_glfs_init(gconf, errp);
}
static void qemu_gluster_complete_aio(void *opaque)
{
GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
@@ -255,30 +629,6 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
qemu_bh_schedule(acb->bh);
}
#define GLUSTER_OPT_FILENAME "filename"
#define GLUSTER_OPT_DEBUG "debug"
#define GLUSTER_DEBUG_DEFAULT 4
#define GLUSTER_DEBUG_MAX 9
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "gluster",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = GLUSTER_OPT_FILENAME,
.type = QEMU_OPT_STRING,
.help = "URL to the gluster image",
},
{
.name = GLUSTER_OPT_DEBUG,
.type = QEMU_OPT_NUMBER,
.help = "Gluster log level, valid range is 0-9",
},
{ /* end of list */ }
},
};
static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
{
assert(open_flags != NULL);
@@ -324,7 +674,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
BDRVGlusterState *s = bs->opaque;
int open_flags = 0;
int ret = 0;
GlusterConf *gconf = g_new0(GlusterConf, 1);
BlockdevOptionsGluster *gconf = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
@@ -347,8 +697,10 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
s->debug_level = GLUSTER_DEBUG_MAX;
}
gconf = g_new0(BlockdevOptionsGluster, 1);
gconf->debug_level = s->debug_level;
s->glfs = qemu_gluster_init(gconf, filename, errp);
gconf->has_debug_level = true;
s->glfs = qemu_gluster_init(gconf, filename, options, errp);
if (!s->glfs) {
ret = -errno;
goto out;
@@ -373,7 +725,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
qemu_gluster_parse_flags(bdrv_flags, &open_flags);
s->fd = glfs_open(s->glfs, gconf->image, open_flags);
s->fd = glfs_open(s->glfs, gconf->path, open_flags);
if (!s->fd) {
ret = -errno;
}
@@ -382,7 +734,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
out:
qemu_opts_del(opts);
qemu_gluster_gconf_free(gconf);
qapi_free_BlockdevOptionsGluster(gconf);
if (!ret) {
return ret;
}
@@ -395,19 +747,13 @@ out:
return ret;
}
typedef struct BDRVGlusterReopenState {
struct glfs *glfs;
struct glfs_fd *fd;
} BDRVGlusterReopenState;
static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
int ret = 0;
BDRVGlusterState *s;
BDRVGlusterReopenState *reop_s;
GlusterConf *gconf = NULL;
BlockdevOptionsGluster *gconf;
int open_flags = 0;
assert(state != NULL);
@@ -420,10 +766,10 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
qemu_gluster_parse_flags(state->flags, &open_flags);
gconf = g_new0(GlusterConf, 1);
gconf = g_new0(BlockdevOptionsGluster, 1);
gconf->debug_level = s->debug_level;
reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
gconf->has_debug_level = true;
reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
if (reop_s->glfs == NULL) {
ret = -errno;
goto exit;
@@ -439,7 +785,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
}
#endif
reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
reop_s->fd = glfs_open(reop_s->glfs, gconf->path, open_flags);
if (reop_s->fd == NULL) {
/* reops->glfs will be cleaned up in _abort */
ret = -errno;
@@ -448,7 +794,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
exit:
/* state->opaque will be freed in either the _abort or _commit */
qemu_gluster_gconf_free(gconf);
qapi_free_BlockdevOptionsGluster(gconf);
return ret;
}
@@ -501,7 +847,9 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
#ifdef CONFIG_GLUSTERFS_ZEROFILL
static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int size, BdrvRequestFlags flags)
int64_t offset,
int size,
BdrvRequestFlags flags)
{
int ret;
GlusterAIOCB acb;
@@ -527,7 +875,7 @@ static inline bool gluster_supports_zerofill(void)
}
static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
int64_t size)
int64_t size)
{
return glfs_zerofill(fd, offset, size);
}
@@ -539,7 +887,7 @@ static inline bool gluster_supports_zerofill(void)
}
static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
int64_t size)
int64_t size)
{
return 0;
}
@@ -548,14 +896,15 @@ static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
static int qemu_gluster_create(const char *filename,
QemuOpts *opts, Error **errp)
{
BlockdevOptionsGluster *gconf;
struct glfs *glfs;
struct glfs_fd *fd;
int ret = 0;
int prealloc = 0;
int64_t total_size = 0;
char *tmp = NULL;
GlusterConf *gconf = g_new0(GlusterConf, 1);
gconf = g_new0(BlockdevOptionsGluster, 1);
gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
GLUSTER_DEBUG_DEFAULT);
if (gconf->debug_level < 0) {
@@ -563,8 +912,9 @@ static int qemu_gluster_create(const char *filename,
} else if (gconf->debug_level > GLUSTER_DEBUG_MAX) {
gconf->debug_level = GLUSTER_DEBUG_MAX;
}
gconf->has_debug_level = true;
glfs = qemu_gluster_init(gconf, filename, errp);
glfs = qemu_gluster_init(gconf, filename, NULL, errp);
if (!glfs) {
ret = -errno;
goto out;
@@ -576,19 +926,17 @@ static int qemu_gluster_create(const char *filename,
tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
if (!tmp || !strcmp(tmp, "off")) {
prealloc = 0;
} else if (!strcmp(tmp, "full") &&
gluster_supports_zerofill()) {
} else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) {
prealloc = 1;
} else {
error_setg(errp, "Invalid preallocation mode: '%s'"
" or GlusterFS doesn't support zerofill API",
tmp);
" or GlusterFS doesn't support zerofill API", tmp);
ret = -EINVAL;
goto out;
}
fd = glfs_creat(glfs, gconf->image,
O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
fd = glfs_creat(glfs, gconf->path,
O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
if (!fd) {
ret = -errno;
} else {
@@ -606,7 +954,7 @@ static int qemu_gluster_create(const char *filename,
}
out:
g_free(tmp);
qemu_gluster_gconf_free(gconf);
qapi_free_BlockdevOptionsGluster(gconf);
if (glfs) {
glfs_fini(glfs);
}
@@ -614,7 +962,8 @@ out:
}
static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov, int write)
{
int ret;
GlusterAIOCB acb;
@@ -629,10 +978,10 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
if (write) {
ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
gluster_finish_aiocb, &acb);
gluster_finish_aiocb, &acb);
} else {
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
gluster_finish_aiocb, &acb);
gluster_finish_aiocb, &acb);
}
if (ret < 0) {
@@ -657,13 +1006,17 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
}
static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
int64_t sector_num,
int nb_sectors,
QEMUIOVector *qiov)
{
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
}
static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
int64_t sector_num,
int nb_sectors,
QEMUIOVector *qiov)
{
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
}
@@ -724,14 +1077,12 @@ error:
}
#ifdef CONFIG_GLUSTERFS_DISCARD
static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
int64_t offset, int size)
{
int ret;
GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
size_t size = nb_sectors * BDRV_SECTOR_SIZE;
off_t offset = sector_num * BDRV_SECTOR_SIZE;
acb.size = 0;
acb.ret = 0;
@@ -934,34 +1285,11 @@ static int64_t coroutine_fn qemu_gluster_co_get_block_status(
}
static QemuOptsList qemu_gluster_create_opts = {
.name = "qemu-gluster-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
.desc = {
{
.name = BLOCK_OPT_SIZE,
.type = QEMU_OPT_SIZE,
.help = "Virtual disk size"
},
{
.name = BLOCK_OPT_PREALLOC,
.type = QEMU_OPT_STRING,
.help = "Preallocation mode (allowed values: off, full)"
},
{
.name = GLUSTER_OPT_DEBUG,
.type = QEMU_OPT_NUMBER,
.help = "Gluster log level, valid range is 0-9",
},
{ /* end of list */ }
}
};
static BlockDriver bdrv_gluster = {
.format_name = "gluster",
.protocol_name = "gluster",
.instance_size = sizeof(BDRVGlusterState),
.bdrv_needs_filename = true,
.bdrv_needs_filename = false,
.bdrv_file_open = qemu_gluster_open,
.bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
@@ -976,7 +1304,7 @@ static BlockDriver bdrv_gluster = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_discard = qemu_gluster_co_discard,
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
@@ -989,7 +1317,7 @@ static BlockDriver bdrv_gluster_tcp = {
.format_name = "gluster",
.protocol_name = "gluster+tcp",
.instance_size = sizeof(BDRVGlusterState),
.bdrv_needs_filename = true,
.bdrv_needs_filename = false,
.bdrv_file_open = qemu_gluster_open,
.bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
@@ -1004,7 +1332,7 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_discard = qemu_gluster_co_discard,
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
@@ -1032,7 +1360,7 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_discard = qemu_gluster_co_discard,
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
@@ -1041,6 +1369,12 @@ static BlockDriver bdrv_gluster_unix = {
.create_opts = &qemu_gluster_create_opts,
};
/* rdma is deprecated (actually never supported for volfile fetch).
* Let's maintain it for the protocol compatibility, to make sure things
* won't break immediately. For now, gluster+rdma will fall back to gluster+tcp
* protocol with a warning.
* TODO: remove gluster+rdma interface support
*/
static BlockDriver bdrv_gluster_rdma = {
.format_name = "gluster",
.protocol_name = "gluster+rdma",
@@ -1060,7 +1394,7 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_discard = qemu_gluster_co_discard,
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,

View File

@@ -33,14 +33,13 @@
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
static BlockAIOCB *bdrv_co_aio_rw_vector(BdrvChild *child,
int64_t sector_num,
QEMUIOVector *qiov,
int nb_sectors,
BdrvRequestFlags flags,
BlockCompletionFunc *cb,
void *opaque,
bool is_write);
static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
int64_t offset,
QEMUIOVector *qiov,
BdrvRequestFlags flags,
BlockCompletionFunc *cb,
void *opaque,
bool is_write);
static void coroutine_fn bdrv_co_do_rw(void *opaque);
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags);
@@ -541,17 +540,6 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
return 0;
}
static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
return -EIO;
}
return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE);
}
typedef struct RwCo {
BdrvChild *child;
int64_t offset;
@@ -898,6 +886,19 @@ emulate_flags:
return ret;
}
static int coroutine_fn
bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov)
{
BlockDriver *drv = bs->drv;
if (!drv->bdrv_co_pwritev_compressed) {
return -ENOTSUP;
}
return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
}
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
{
@@ -971,21 +972,25 @@ err:
/*
* Forwards an already correctly aligned request to the BlockDriver. This
* handles copy on read and zeroing after EOF; any other features must be
* implemented by the caller.
* handles copy on read, zeroing after EOF, and fragmentation of large
* reads; any other features must be implemented by the caller.
*/
static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
int64_t total_bytes, max_bytes;
int ret;
int ret = 0;
uint64_t bytes_remaining = bytes;
int max_transfer;
assert(is_power_of_2(align));
assert((offset & (align - 1)) == 0);
assert((bytes & (align - 1)) == 0);
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
align);
/* TODO: We would need a per-BDS .supported_read_flags and
* potential fallback support, if we ever implement any read flags
@@ -1024,7 +1029,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
}
/* Forward the request to the BlockDriver */
/* Forward the request to the BlockDriver, possibly fragmenting it */
total_bytes = bdrv_getlength(bs);
if (total_bytes < 0) {
ret = total_bytes;
@@ -1032,30 +1037,39 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
if (bytes <= max_bytes) {
if (bytes <= max_bytes && bytes <= max_transfer) {
ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
} else if (max_bytes > 0) {
QEMUIOVector local_qiov;
qemu_iovec_init(&local_qiov, qiov->niov);
qemu_iovec_concat(&local_qiov, qiov, 0, max_bytes);
ret = bdrv_driver_preadv(bs, offset, max_bytes, &local_qiov, 0);
qemu_iovec_destroy(&local_qiov);
} else {
ret = 0;
goto out;
}
/* Reading beyond end of file is supposed to produce zeroes */
if (ret == 0 && total_bytes < offset + bytes) {
uint64_t zero_offset = MAX(0, total_bytes - offset);
uint64_t zero_bytes = offset + bytes - zero_offset;
qemu_iovec_memset(qiov, zero_offset, 0, zero_bytes);
while (bytes_remaining) {
int num;
if (max_bytes) {
QEMUIOVector local_qiov;
num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
assert(num);
qemu_iovec_init(&local_qiov, qiov->niov);
qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
num, &local_qiov, 0);
max_bytes -= num;
qemu_iovec_destroy(&local_qiov);
} else {
num = bytes_remaining;
ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
bytes_remaining);
}
if (ret < 0) {
goto out;
}
bytes_remaining -= num;
}
out:
return ret;
return ret < 0 ? ret : 0;
}
/*
@@ -1168,10 +1182,11 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
bs->bl.request_alignment);
assert(is_power_of_2(alignment));
head = offset & (alignment - 1);
tail = (offset + count) & (alignment - 1);
max_write_zeroes &= ~(alignment - 1);
assert(alignment % bs->bl.request_alignment == 0);
head = offset % alignment;
tail = (offset + count) % alignment;
max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
assert(max_write_zeroes >= bs->bl.request_alignment);
while (count > 0 && !ret) {
int num = count;
@@ -1256,7 +1271,8 @@ fail:
}
/*
* Forwards an already correctly aligned write request to the BlockDriver.
* Forwards an already correctly aligned write request to the BlockDriver,
* after possibly fragmenting it.
*/
static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
@@ -1268,6 +1284,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
int64_t start_sector = offset >> BDRV_SECTOR_BITS;
int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
uint64_t bytes_remaining = bytes;
int max_transfer;
assert(is_power_of_2(align));
assert((offset & (align - 1)) == 0);
@@ -1275,6 +1293,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
assert(!(flags & ~BDRV_REQ_MASK));
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
align);
waited = wait_serialising_requests(req);
assert(!waited || !req->serialising);
@@ -1297,9 +1317,36 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
} else if (flags & BDRV_REQ_ZERO_WRITE) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
} else {
} else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov);
} else if (bytes <= max_transfer) {
bdrv_debug_event(bs, BLKDBG_PWRITEV);
ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
} else {
bdrv_debug_event(bs, BLKDBG_PWRITEV);
while (bytes_remaining) {
int num = MIN(bytes_remaining, max_transfer);
QEMUIOVector local_qiov;
int local_flags = flags;
assert(num);
if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
!(bs->supported_write_flags & BDRV_REQ_FUA)) {
/* If FUA is going to be emulated by flush, we only
* need to flush on the last iteration */
local_flags &= ~BDRV_REQ_FUA;
}
qemu_iovec_init(&local_qiov, qiov->niov);
qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
num, &local_qiov, local_flags);
qemu_iovec_destroy(&local_qiov);
if (ret < 0) {
break;
}
bytes_remaining -= num;
}
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
@@ -1312,6 +1359,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
if (ret >= 0) {
bs->total_sectors = MAX(bs->total_sectors, end_sector);
ret = 0;
}
return ret;
@@ -1835,28 +1883,6 @@ int bdrv_is_allocated_above(BlockDriverState *top,
return 0;
}
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
BlockDriver *drv = bs->drv;
int ret;
if (!drv) {
return -ENOMEDIUM;
}
if (!drv->bdrv_write_compressed) {
return -ENOTSUP;
}
ret = bdrv_check_request(bs, sector_num, nb_sectors);
if (ret < 0) {
return ret;
}
assert(QLIST_EMPTY(&bs->dirty_bitmaps));
return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
}
typedef struct BdrvVmstateCo {
BlockDriverState *bs;
QEMUIOVector *qiov;
@@ -1971,8 +1997,9 @@ BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t sector_num,
{
trace_bdrv_aio_readv(child->bs, sector_num, nb_sectors, opaque);
return bdrv_co_aio_rw_vector(child, sector_num, qiov, nb_sectors, 0,
cb, opaque, false);
assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
0, cb, opaque, false);
}
BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num,
@@ -1981,8 +2008,9 @@ BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num,
{
trace_bdrv_aio_writev(child->bs, sector_num, nb_sectors, opaque);
return bdrv_co_aio_rw_vector(child, sector_num, qiov, nb_sectors, 0,
cb, opaque, true);
assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
0, cb, opaque, true);
}
void bdrv_aio_cancel(BlockAIOCB *acb)
@@ -2018,8 +2046,8 @@ typedef struct BlockRequest {
union {
/* Used during read, write, trim */
struct {
int64_t sector;
int nb_sectors;
int64_t offset;
int bytes;
int flags;
QEMUIOVector *qiov;
};
@@ -2083,24 +2111,23 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
BlockAIOCBCoroutine *acb = opaque;
if (!acb->is_write) {
acb->req.error = bdrv_co_do_readv(acb->child, acb->req.sector,
acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
acb->req.error = bdrv_co_preadv(acb->child, acb->req.offset,
acb->req.qiov->size, acb->req.qiov, acb->req.flags);
} else {
acb->req.error = bdrv_co_do_writev(acb->child, acb->req.sector,
acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
acb->req.error = bdrv_co_pwritev(acb->child, acb->req.offset,
acb->req.qiov->size, acb->req.qiov, acb->req.flags);
}
bdrv_co_complete(acb);
}
static BlockAIOCB *bdrv_co_aio_rw_vector(BdrvChild *child,
int64_t sector_num,
QEMUIOVector *qiov,
int nb_sectors,
BdrvRequestFlags flags,
BlockCompletionFunc *cb,
void *opaque,
bool is_write)
static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
int64_t offset,
QEMUIOVector *qiov,
BdrvRequestFlags flags,
BlockCompletionFunc *cb,
void *opaque,
bool is_write)
{
Coroutine *co;
BlockAIOCBCoroutine *acb;
@@ -2109,8 +2136,7 @@ static BlockAIOCB *bdrv_co_aio_rw_vector(BdrvChild *child,
acb->child = child;
acb->need_bh = true;
acb->req.error = -EINPROGRESS;
acb->req.sector = sector_num;
acb->req.nb_sectors = nb_sectors;
acb->req.offset = offset;
acb->req.qiov = qiov;
acb->req.flags = flags;
acb->is_write = is_write;
@@ -2150,30 +2176,29 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
return &acb->common;
}
static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
static void coroutine_fn bdrv_aio_pdiscard_co_entry(void *opaque)
{
BlockAIOCBCoroutine *acb = opaque;
BlockDriverState *bs = acb->common.bs;
acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
acb->req.error = bdrv_co_pdiscard(bs, acb->req.offset, acb->req.bytes);
bdrv_co_complete(acb);
}
BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
BlockAIOCB *bdrv_aio_pdiscard(BlockDriverState *bs, int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
Coroutine *co;
BlockAIOCBCoroutine *acb;
trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
trace_bdrv_aio_pdiscard(bs, offset, count, opaque);
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
acb->need_bh = true;
acb->req.error = -EINPROGRESS;
acb->req.sector = sector_num;
acb->req.nb_sectors = nb_sectors;
co = qemu_coroutine_create(bdrv_aio_discard_co_entry, acb);
acb->req.offset = offset;
acb->req.bytes = count;
co = qemu_coroutine_create(bdrv_aio_pdiscard_co_entry, acb);
qemu_coroutine_enter(co);
bdrv_co_maybe_schedule_bh(acb);
@@ -2240,11 +2265,11 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
int current_gen = bs->write_gen;
/* Wait until any previous flushes are completed */
while (bs->flush_started_gen != bs->flushed_gen) {
while (bs->active_flush_req != NULL) {
qemu_co_queue_wait(&bs->flush_queue);
}
bs->flush_started_gen = current_gen;
bs->active_flush_req = &req;
/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
@@ -2314,7 +2339,9 @@ flush_parent:
out:
/* Notify any pending flushes that we have completed */
bs->flushed_gen = current_gen;
qemu_co_queue_restart_all(&bs->flush_queue);
bs->active_flush_req = NULL;
/* Return value is ignored - it's ok if wait queue is empty */
qemu_co_queue_next(&bs->flush_queue);
tracked_request_end(&req);
return ret;
@@ -2346,28 +2373,29 @@ int bdrv_flush(BlockDriverState *bs)
typedef struct DiscardCo {
BlockDriverState *bs;
int64_t sector_num;
int nb_sectors;
int64_t offset;
int count;
int ret;
} DiscardCo;
static void coroutine_fn bdrv_discard_co_entry(void *opaque)
static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
{
DiscardCo *rwco = opaque;
rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->count);
}
int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
int count)
{
BdrvTrackedRequest req;
int max_discard, ret;
int max_pdiscard, ret;
int head, align;
if (!bs->drv) {
return -ENOMEDIUM;
}
ret = bdrv_check_request(bs, sector_num, nb_sectors);
ret = bdrv_check_byte_request(bs, offset, count);
if (ret < 0) {
return ret;
} else if (bs->read_only) {
@@ -2380,50 +2408,49 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
return 0;
}
if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
return 0;
}
tracked_request_begin(&req, bs, sector_num << BDRV_SECTOR_BITS,
nb_sectors << BDRV_SECTOR_BITS, BDRV_TRACKED_DISCARD);
/* Discard is advisory, so ignore any unaligned head or tail */
align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
assert(align % bs->bl.request_alignment == 0);
head = offset % align;
if (head) {
head = MIN(count, align - head);
count -= head;
offset += head;
}
count = QEMU_ALIGN_DOWN(count, align);
if (!count) {
return 0;
}
tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD);
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
if (ret < 0) {
goto out;
}
max_discard = MIN_NON_ZERO(bs->bl.max_pdiscard >> BDRV_SECTOR_BITS,
BDRV_REQUEST_MAX_SECTORS);
while (nb_sectors > 0) {
max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
align);
assert(max_pdiscard);
while (count > 0) {
int ret;
int num = nb_sectors;
int discard_alignment = bs->bl.pdiscard_alignment >> BDRV_SECTOR_BITS;
int num = MIN(count, max_pdiscard);
/* align request */
if (discard_alignment &&
num >= discard_alignment &&
sector_num % discard_alignment) {
if (num > discard_alignment) {
num = discard_alignment;
}
num -= sector_num % discard_alignment;
}
/* limit request size */
if (num > max_discard) {
num = max_discard;
}
if (bs->drv->bdrv_co_discard) {
ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
if (bs->drv->bdrv_co_pdiscard) {
ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
} else {
BlockAIOCB *acb;
CoroutineIOCompletion co = {
.coroutine = qemu_coroutine_self(),
};
acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
bdrv_co_io_em_complete, &co);
acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
bdrv_co_io_em_complete, &co);
if (acb == NULL) {
ret = -EIO;
goto out;
@@ -2436,8 +2463,8 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
goto out;
}
sector_num += num;
nb_sectors -= num;
offset += num;
count -= num;
}
ret = 0;
out:
@@ -2448,23 +2475,23 @@ out:
return ret;
}
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
Coroutine *co;
DiscardCo rwco = {
.bs = bs,
.sector_num = sector_num,
.nb_sectors = nb_sectors,
.offset = offset,
.count = count,
.ret = NOT_DONE,
};
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_discard_co_entry(&rwco);
bdrv_pdiscard_co_entry(&rwco);
} else {
AioContext *aio_context = bdrv_get_aio_context(bs);
co = qemu_coroutine_create(bdrv_discard_co_entry, &rwco);
co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
qemu_coroutine_enter(co);
while (rwco.ret == NOT_DONE) {
aio_poll(aio_context, true);

View File

@@ -586,11 +586,8 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
return -EINVAL;
}
if (bs->bl.max_transfer &&
nb_sectors << BDRV_SECTOR_BITS > bs->bl.max_transfer) {
error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
"of %" PRIu32 " bytes", nb_sectors, bs->bl.max_transfer);
return -EINVAL;
if (bs->bl.max_transfer) {
assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
}
lba = sector_qemu2lun(sector_num, iscsilun);
@@ -754,11 +751,8 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
return -EINVAL;
}
if (bs->bl.max_transfer &&
nb_sectors << BDRV_SECTOR_BITS > bs->bl.max_transfer) {
error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
"of %" PRIu32 " bytes", nb_sectors, bs->bl.max_transfer);
return -EINVAL;
if (bs->bl.max_transfer) {
assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
}
/* if cache.direct is off and we have a valid entry in our allocation map
@@ -1048,29 +1042,26 @@ iscsi_getlength(BlockDriverState *bs)
}
static int
coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
struct unmap_list list;
if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
assert(is_byte_request_lun_aligned(offset, count, iscsilun));
if (!iscsilun->lbp.lbpu) {
/* UNMAP is not supported by the target */
return 0;
}
list.lba = sector_qemu2lun(sector_num, iscsilun);
list.num = sector_qemu2lun(nb_sectors, iscsilun);
list.lba = offset / iscsilun->block_size;
list.num = count / iscsilun->block_size;
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
iscsi_co_generic_cb, &iTask) == NULL) {
iscsi_co_generic_cb, &iTask) == NULL) {
return -ENOMEM;
}
@@ -1100,7 +1091,8 @@ retry:
return iTask.err_code;
}
iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
return 0;
}
@@ -2004,7 +1996,7 @@ static BlockDriver bdrv_iscsi = {
.bdrv_refresh_limits = iscsi_refresh_limits,
.bdrv_co_get_block_status = iscsi_co_get_block_status,
.bdrv_co_discard = iscsi_co_discard,
.bdrv_co_pdiscard = iscsi_co_pdiscard,
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev_flags = iscsi_co_writev_flags,

View File

@@ -221,7 +221,13 @@ static void ioq_submit(LinuxAioState *s)
break;
}
if (ret < 0) {
abort();
/* Fail the first request, retry the rest */
aiocb = QSIMPLEQ_FIRST(&s->io_q.pending);
QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next);
s->io_q.in_queue--;
aiocb->ret = ret;
qemu_laio_process_completion(aiocb);
continue;
}
s->io_q.in_flight += ret;

View File

@@ -23,7 +23,9 @@
#define SLICE_TIME 100000000ULL /* ns */
#define MAX_IN_FLIGHT 16
#define DEFAULT_MIRROR_BUF_SIZE (10 << 20)
#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
#define DEFAULT_MIRROR_BUF_SIZE \
(MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
/* The mirroring buffer is a list of granularity-sized chunks.
* Free chunks are organized in a list.
@@ -58,9 +60,10 @@ typedef struct MirrorBlockJob {
QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
int buf_free_count;
uint64_t last_pause_ns;
unsigned long *in_flight_bitmap;
int in_flight;
int sectors_in_flight;
int64_t sectors_in_flight;
int ret;
bool unmap;
bool waiting_for_io;
@@ -303,8 +306,9 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
s->in_flight++;
s->sectors_in_flight += nb_sectors;
if (is_discard) {
blk_aio_discard(s->target, sector_num, op->nb_sectors,
mirror_write_complete, op);
blk_aio_pdiscard(s->target, sector_num << BDRV_SECTOR_BITS,
op->nb_sectors << BDRV_SECTOR_BITS,
mirror_write_complete, op);
} else {
blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE,
op->nb_sectors * BDRV_SECTOR_SIZE,
@@ -322,6 +326,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
int nb_chunks = 1;
int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
MAX_IO_SECTORS);
sector_num = hbitmap_iter_next(&s->hbi);
if (sector_num < 0) {
@@ -372,7 +379,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
while (nb_chunks > 0 && sector_num < end) {
int ret;
int io_sectors;
int io_sectors, io_sectors_acct;
BlockDriverState *file;
enum MirrorMethod {
MIRROR_METHOD_COPY,
@@ -385,7 +392,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
nb_chunks * sectors_per_chunk,
&io_sectors, &file);
if (ret < 0) {
io_sectors = nb_chunks * sectors_per_chunk;
io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
} else if (ret & BDRV_BLOCK_DATA) {
io_sectors = MIN(io_sectors, max_io_sectors);
}
io_sectors -= io_sectors % sectors_per_chunk;
@@ -405,16 +414,30 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
}
}
while (s->in_flight >= MAX_IN_FLIGHT) {
trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
mirror_wait_for_io(s);
}
if (s->ret < 0) {
return 0;
}
mirror_clip_sectors(s, sector_num, &io_sectors);
switch (mirror_method) {
case MIRROR_METHOD_COPY:
io_sectors = mirror_do_read(s, sector_num, io_sectors);
io_sectors_acct = io_sectors;
break;
case MIRROR_METHOD_ZERO:
mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
break;
case MIRROR_METHOD_DISCARD:
mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
mirror_do_zero_or_discard(s, sector_num, io_sectors,
mirror_method == MIRROR_METHOD_DISCARD);
if (write_zeroes_ok) {
io_sectors_acct = 0;
} else {
io_sectors_acct = io_sectors;
}
break;
default:
abort();
@@ -423,7 +446,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
sector_num += io_sectors;
nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
if (s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors);
delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors_acct);
}
}
return delay_ns;
@@ -508,25 +531,97 @@ static void mirror_exit(BlockJob *job, void *opaque)
block_job_completed(&s->common, data->ret);
g_free(data);
bdrv_drained_end(src);
if (qemu_get_aio_context() == bdrv_get_aio_context(src)) {
aio_enable_external(iohandler_get_aio_context());
}
bdrv_unref(src);
}
static void mirror_throttle(MirrorBlockJob *s)
{
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
if (now - s->last_pause_ns > SLICE_TIME) {
s->last_pause_ns = now;
block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
} else {
block_job_pause_point(&s->common);
}
}
static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
{
int64_t sector_num, end;
BlockDriverState *base = s->base;
BlockDriverState *bs = blk_bs(s->common.blk);
BlockDriverState *target_bs = blk_bs(s->target);
int ret, n;
end = s->bdev_length / BDRV_SECTOR_SIZE;
if (base == NULL && !bdrv_has_zero_init(target_bs)) {
if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end);
return 0;
}
for (sector_num = 0; sector_num < end; ) {
int nb_sectors = MIN(end - sector_num,
QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
mirror_throttle(s);
if (block_job_is_cancelled(&s->common)) {
return 0;
}
if (s->in_flight >= MAX_IN_FLIGHT) {
trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1);
mirror_wait_for_io(s);
continue;
}
mirror_do_zero_or_discard(s, sector_num, nb_sectors, false);
sector_num += nb_sectors;
}
mirror_drain(s);
}
/* First part, loop on the sectors and initialize the dirty bitmap. */
for (sector_num = 0; sector_num < end; ) {
/* Just to make sure we are not exceeding int limit. */
int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
end - sector_num);
mirror_throttle(s);
if (block_job_is_cancelled(&s->common)) {
return 0;
}
ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
if (ret < 0) {
return ret;
}
assert(n > 0);
if (ret == 1) {
bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
}
sector_num += n;
}
return 0;
}
static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
MirrorExitData *data;
BlockDriverState *bs = blk_bs(s->common.blk);
BlockDriverState *target_bs = blk_bs(s->target);
int64_t sector_num, end, length;
uint64_t last_pause_ns;
int64_t length;
BlockDriverInfo bdi;
char backing_filename[2]; /* we only need 2 characters because we are only
checking for a NULL string */
int ret = 0;
int n;
int target_cluster_size = BDRV_SECTOR_SIZE;
if (block_job_is_cancelled(&s->common)) {
@@ -568,7 +663,6 @@ static void coroutine_fn mirror_run(void *opaque)
s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
end = s->bdev_length / BDRV_SECTOR_SIZE;
s->buf = qemu_try_blockalign(bs, s->buf_size);
if (s->buf == NULL) {
ret = -ENOMEM;
@@ -577,47 +671,18 @@ static void coroutine_fn mirror_run(void *opaque)
mirror_free_init(s);
last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
if (!s->is_none_mode) {
/* First part, loop on the sectors and initialize the dirty bitmap. */
BlockDriverState *base = s->base;
bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(target_bs);
for (sector_num = 0; sector_num < end; ) {
/* Just to make sure we are not exceeding int limit. */
int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
end - sector_num);
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
if (now - last_pause_ns > SLICE_TIME) {
last_pause_ns = now;
block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
} else {
block_job_pause_point(&s->common);
}
if (block_job_is_cancelled(&s->common)) {
goto immediate_exit;
}
ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
if (ret < 0) {
goto immediate_exit;
}
assert(n > 0);
if (ret == 1 || mark_all_dirty) {
bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
}
sector_num += n;
ret = mirror_dirty_init(s);
if (ret < 0 || block_job_is_cancelled(&s->common)) {
goto immediate_exit;
}
}
bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
for (;;) {
uint64_t delay_ns = 0;
int64_t cnt;
int64_t cnt, delta;
bool should_complete;
if (s->ret < 0) {
@@ -640,9 +705,10 @@ static void coroutine_fn mirror_run(void *opaque)
* We do so every SLICE_TIME nanoseconds, or when there is an error,
* or when the source is clean, whichever comes first.
*/
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
if (delta < SLICE_TIME &&
s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
mirror_wait_for_io(s);
@@ -710,7 +776,7 @@ static void coroutine_fn mirror_run(void *opaque)
s->common.cancelled = false;
break;
}
last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
}
immediate_exit:
@@ -734,12 +800,6 @@ immediate_exit:
/* Before we switch to target in mirror_exit, make sure data doesn't
* change. */
bdrv_drained_begin(bs);
if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) {
/* FIXME: virtio host notifiers run on iohandler_ctx, therefore the
* above bdrv_drained_end isn't enough to quiesce it. This is ugly, we
* need a block layer API change to achieve this. */
aio_disable_external(iohandler_get_aio_context());
}
block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}

View File

@@ -116,7 +116,7 @@ static void nbd_restart_write(void *opaque)
static int nbd_co_send_request(BlockDriverState *bs,
struct nbd_request *request,
QEMUIOVector *qiov, int offset)
QEMUIOVector *qiov)
{
NbdClientSession *s = nbd_get_client_session(bs);
AioContext *aio_context;
@@ -149,8 +149,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
qio_channel_set_cork(s->ioc, true);
rc = nbd_send_request(s->ioc, request);
if (rc >= 0) {
ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
offset, request->len, 0);
ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
false);
if (ret != request->len) {
rc = -EIO;
}
@@ -167,8 +167,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
}
static void nbd_co_receive_reply(NbdClientSession *s,
struct nbd_request *request, struct nbd_reply *reply,
QEMUIOVector *qiov, int offset)
struct nbd_request *request,
struct nbd_reply *reply,
QEMUIOVector *qiov)
{
int ret;
@@ -181,8 +182,8 @@ static void nbd_co_receive_reply(NbdClientSession *s,
reply->error = EIO;
} else {
if (qiov && reply->error == 0) {
ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
offset, request->len, 1);
ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
true);
if (ret != request->len) {
reply->error = EIO;
}
@@ -217,36 +218,41 @@ static void nbd_coroutine_end(NbdClientSession *s,
}
}
static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov,
int offset)
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = { .type = NBD_CMD_READ };
struct nbd_request request = {
.type = NBD_CMD_READ,
.from = offset,
.len = bytes,
};
struct nbd_reply reply;
ssize_t ret;
request.from = sector_num * 512;
request.len = nb_sectors * 512;
assert(bytes <= NBD_MAX_BUFFER_SIZE);
assert(!flags);
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, NULL, 0);
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
nbd_co_receive_reply(client, &request, &reply, qiov, offset);
nbd_co_receive_reply(client, &request, &reply, qiov);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov,
int offset, int flags)
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = { .type = NBD_CMD_WRITE };
struct nbd_request request = {
.type = NBD_CMD_WRITE,
.from = offset,
.len = bytes,
};
struct nbd_reply reply;
ssize_t ret;
@@ -255,55 +261,19 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
request.type |= NBD_CMD_FLAG_FUA;
}
request.from = sector_num * 512;
request.len = nb_sectors * 512;
assert(bytes <= NBD_MAX_BUFFER_SIZE);
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, qiov, offset);
ret = nbd_co_send_request(bs, &request, qiov);
if (ret < 0) {
reply.error = -ret;
} else {
nbd_co_receive_reply(client, &request, &reply, NULL, 0);
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
int offset = 0;
int ret;
while (nb_sectors > NBD_MAX_SECTORS) {
ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
if (ret < 0) {
return ret;
}
offset += NBD_MAX_SECTORS * 512;
sector_num += NBD_MAX_SECTORS;
nb_sectors -= NBD_MAX_SECTORS;
}
return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
}
int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov, int flags)
{
int offset = 0;
int ret;
while (nb_sectors > NBD_MAX_SECTORS) {
ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset,
flags);
if (ret < 0) {
return ret;
}
offset += NBD_MAX_SECTORS * 512;
sector_num += NBD_MAX_SECTORS;
nb_sectors -= NBD_MAX_SECTORS;
}
return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset, flags);
}
int nbd_client_co_flush(BlockDriverState *bs)
{
NbdClientSession *client = nbd_get_client_session(bs);
@@ -319,36 +289,37 @@ int nbd_client_co_flush(BlockDriverState *bs)
request.len = 0;
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, NULL, 0);
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
nbd_co_receive_reply(client, &request, &reply, NULL, 0);
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = { .type = NBD_CMD_TRIM };
struct nbd_request request = {
.type = NBD_CMD_TRIM,
.from = offset,
.len = count,
};
struct nbd_reply reply;
ssize_t ret;
if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
return 0;
}
request.from = sector_num * 512;
request.len = nb_sectors * 512;
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, NULL, 0);
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
nbd_co_receive_reply(client, &request, &reply, NULL, 0);
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;

View File

@@ -20,7 +20,7 @@
typedef struct NbdClientSession {
QIOChannelSocket *sioc; /* The master data channel */
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
uint32_t nbdflags;
uint16_t nbdflags;
off_t size;
CoMutex send_mutex;
@@ -44,13 +44,12 @@ int nbd_client_init(BlockDriverState *bs,
Error **errp);
void nbd_client_close(BlockDriverState *bs);
int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors);
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
int nbd_client_co_flush(BlockDriverState *bs);
int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov, int flags);
int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags);
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags);
void nbd_client_detach_aio_context(BlockDriverState *bs);
void nbd_client_attach_aio_context(BlockDriverState *bs,

View File

@@ -42,6 +42,9 @@
typedef struct BDRVNBDState {
NbdClientSession client;
/* For nbd_refresh_filename() */
char *path, *host, *port, *export, *tlscredsid;
} BDRVNBDState;
static int nbd_parse_uri(const char *filename, QDict *options)
@@ -188,13 +191,15 @@ out:
g_free(file);
}
static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
Error **errp)
static SocketAddress *nbd_config(BDRVNBDState *s, QemuOpts *opts, Error **errp)
{
SocketAddress *saddr;
if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
if (qdict_haskey(options, "path")) {
s->path = g_strdup(qemu_opt_get(opts, "path"));
s->host = g_strdup(qemu_opt_get(opts, "host"));
if (!s->path == !s->host) {
if (s->path) {
error_setg(errp, "path and host may not be used at the same time.");
} else {
error_setg(errp, "one of path and host must be specified.");
@@ -204,32 +209,28 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
saddr = g_new0(SocketAddress, 1);
if (qdict_haskey(options, "path")) {
if (s->path) {
UnixSocketAddress *q_unix;
saddr->type = SOCKET_ADDRESS_KIND_UNIX;
q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
q_unix->path = g_strdup(qdict_get_str(options, "path"));
qdict_del(options, "path");
q_unix->path = g_strdup(s->path);
} else {
InetSocketAddress *inet;
s->port = g_strdup(qemu_opt_get(opts, "port"));
saddr->type = SOCKET_ADDRESS_KIND_INET;
inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
inet->host = g_strdup(qdict_get_str(options, "host"));
if (!qdict_get_try_str(options, "port")) {
inet->host = g_strdup(s->host);
inet->port = g_strdup(s->port);
if (!inet->port) {
inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
} else {
inet->port = g_strdup(qdict_get_str(options, "port"));
}
qdict_del(options, "host");
qdict_del(options, "port");
}
s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
*export = g_strdup(qdict_get_try_str(options, "export"));
if (*export) {
qdict_del(options, "export");
}
s->export = g_strdup(qemu_opt_get(opts, "export"));
return saddr;
}
@@ -292,28 +293,66 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
}
static QemuOptsList nbd_runtime_opts = {
.name = "nbd",
.head = QTAILQ_HEAD_INITIALIZER(nbd_runtime_opts.head),
.desc = {
{
.name = "host",
.type = QEMU_OPT_STRING,
.help = "TCP host to connect to",
},
{
.name = "port",
.type = QEMU_OPT_STRING,
.help = "TCP port to connect to",
},
{
.name = "path",
.type = QEMU_OPT_STRING,
.help = "Unix socket path to connect to",
},
{
.name = "export",
.type = QEMU_OPT_STRING,
.help = "Name of the NBD export to open",
},
{
.name = "tls-creds",
.type = QEMU_OPT_STRING,
.help = "ID of the TLS credentials to use",
},
},
};
static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVNBDState *s = bs->opaque;
char *export = NULL;
QemuOpts *opts = NULL;
Error *local_err = NULL;
QIOChannelSocket *sioc = NULL;
SocketAddress *saddr;
const char *tlscredsid;
SocketAddress *saddr = NULL;
QCryptoTLSCreds *tlscreds = NULL;
const char *hostname = NULL;
int ret = -EINVAL;
opts = qemu_opts_create(&nbd_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto error;
}
/* Pop the config into our state object. Exit if invalid. */
saddr = nbd_config(s, options, &export, errp);
saddr = nbd_config(s, opts, errp);
if (!saddr) {
goto error;
}
tlscredsid = g_strdup(qdict_get_try_str(options, "tls-creds"));
if (tlscredsid) {
qdict_del(options, "tls-creds");
tlscreds = nbd_get_tls_creds(tlscredsid, errp);
s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
if (s->tlscredsid) {
tlscreds = nbd_get_tls_creds(s->tlscredsid, errp);
if (!tlscreds) {
goto error;
}
@@ -335,7 +374,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
}
/* NBD handshake */
ret = nbd_client_init(bs, sioc, export,
ret = nbd_client_init(bs, sioc, s->export,
tlscreds, hostname, errp);
error:
if (sioc) {
@@ -344,17 +383,18 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
if (tlscreds) {
object_unref(OBJECT(tlscreds));
}
if (ret < 0) {
g_free(s->path);
g_free(s->host);
g_free(s->port);
g_free(s->export);
g_free(s->tlscredsid);
}
qapi_free_SocketAddress(saddr);
g_free(export);
qemu_opts_del(opts);
return ret;
}
static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov);
}
static int nbd_co_flush(BlockDriverState *bs)
{
return nbd_client_co_flush(bs);
@@ -366,15 +406,17 @@ static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
}
static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
return nbd_client_co_discard(bs, sector_num, nb_sectors);
}
static void nbd_close(BlockDriverState *bs)
{
BDRVNBDState *s = bs->opaque;
nbd_client_close(bs);
g_free(s->path);
g_free(s->host);
g_free(s->port);
g_free(s->export);
g_free(s->tlscredsid);
}
static int64_t nbd_getlength(BlockDriverState *bs)
@@ -397,48 +439,45 @@ static void nbd_attach_aio_context(BlockDriverState *bs,
static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVNBDState *s = bs->opaque;
QDict *opts = qdict_new();
const char *path = qdict_get_try_str(options, "path");
const char *host = qdict_get_try_str(options, "host");
const char *port = qdict_get_try_str(options, "port");
const char *export = qdict_get_try_str(options, "export");
const char *tlscreds = qdict_get_try_str(options, "tls-creds");
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
if (path && export) {
if (s->path && s->export) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd+unix:///%s?socket=%s", export, path);
} else if (path && !export) {
"nbd+unix:///%s?socket=%s", s->export, s->path);
} else if (s->path && !s->export) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd+unix://?socket=%s", path);
} else if (!path && export && port) {
"nbd+unix://?socket=%s", s->path);
} else if (!s->path && s->export && s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd://%s:%s/%s", host, port, export);
} else if (!path && export && !port) {
"nbd://%s:%s/%s", s->host, s->port, s->export);
} else if (!s->path && s->export && !s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd://%s/%s", host, export);
} else if (!path && !export && port) {
"nbd://%s/%s", s->host, s->export);
} else if (!s->path && !s->export && s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd://%s:%s", host, port);
} else if (!path && !export && !port) {
"nbd://%s:%s", s->host, s->port);
} else if (!s->path && !s->export && !s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd://%s", host);
"nbd://%s", s->host);
}
if (path) {
qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(path)));
} else if (port) {
qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(port)));
if (s->path) {
qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(s->path)));
} else if (s->port) {
qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(s->port)));
} else {
qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
}
if (export) {
qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
if (s->export) {
qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(s->export)));
}
if (tlscreds) {
qdict_put_obj(opts, "tls-creds", QOBJECT(qstring_from_str(tlscreds)));
if (s->tlscredsid) {
qdict_put_obj(opts, "tls-creds",
QOBJECT(qstring_from_str(s->tlscredsid)));
}
bs->full_open_options = opts;
@@ -450,11 +489,11 @@ static BlockDriver bdrv_nbd = {
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev_flags = nbd_client_co_writev,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
@@ -468,11 +507,11 @@ static BlockDriver bdrv_nbd_tcp = {
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev_flags = nbd_client_co_writev,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
@@ -486,11 +525,11 @@ static BlockDriver bdrv_nbd_unix = {
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev_flags = nbd_client_co_writev,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,

View File

@@ -43,6 +43,7 @@
#define HEADER_MAGIC2 "WithouFreSpacExt"
#define HEADER_VERSION 2
#define HEADER_INUSE_MAGIC (0x746F6E59)
#define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)
#define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */
@@ -475,6 +476,10 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
BDRV_SECTOR_SIZE);
cl_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
DEFAULT_CLUSTER_SIZE), BDRV_SECTOR_SIZE);
if (total_size >= MAX_PARALLELS_IMAGE_FACTOR * cl_size) {
error_propagate(errp, local_err);
return -E2BIG;
}
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {

View File

@@ -913,77 +913,34 @@ static int qcow_make_empty(BlockDriverState *bs)
return 0;
}
typedef struct QcowWriteCo {
BlockDriverState *bs;
int64_t sector_num;
const uint8_t *buf;
int nb_sectors;
int ret;
} QcowWriteCo;
static void qcow_write_co_entry(void *opaque)
{
QcowWriteCo *co = opaque;
QEMUIOVector qiov;
struct iovec iov = (struct iovec) {
.iov_base = (uint8_t*) co->buf,
.iov_len = co->nb_sectors * BDRV_SECTOR_SIZE,
};
qemu_iovec_init_external(&qiov, &iov, 1);
co->ret = qcow_co_writev(co->bs, co->sector_num, co->nb_sectors, &qiov);
}
/* Wrapper for non-coroutine contexts */
static int qcow_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
Coroutine *co;
AioContext *aio_context = bdrv_get_aio_context(bs);
QcowWriteCo data = {
.bs = bs,
.sector_num = sector_num,
.buf = buf,
.nb_sectors = nb_sectors,
.ret = -EINPROGRESS,
};
co = qemu_coroutine_create(qcow_write_co_entry, &data);
qemu_coroutine_enter(co);
while (data.ret == -EINPROGRESS) {
aio_poll(aio_context, true);
}
return data.ret;
}
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
static coroutine_fn int
qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov)
{
BDRVQcowState *s = bs->opaque;
QEMUIOVector hd_qiov;
struct iovec iov;
z_stream strm;
int ret, out_len;
uint8_t *out_buf;
uint8_t *buf, *out_buf;
uint64_t cluster_offset;
if (nb_sectors != s->cluster_sectors) {
ret = -EINVAL;
/* Zero-pad last write if image size is not cluster aligned */
if (sector_num + nb_sectors == bs->total_sectors &&
nb_sectors < s->cluster_sectors) {
uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
memset(pad_buf, 0, s->cluster_size);
memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
ret = qcow_write_compressed(bs, sector_num,
pad_buf, s->cluster_sectors);
qemu_vfree(pad_buf);
buf = qemu_blockalign(bs, s->cluster_size);
if (bytes != s->cluster_size) {
if (bytes > s->cluster_size ||
offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
{
qemu_vfree(buf);
return -EINVAL;
}
return ret;
/* Zero-pad last write if image size is not cluster aligned */
memset(buf + bytes, 0, s->cluster_size - bytes);
}
qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
out_buf = g_malloc(s->cluster_size);
/* best compression, small window, no zlib header */
memset(&strm, 0, sizeof(strm));
@@ -1012,27 +969,35 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
/* could not compress: write normal cluster */
ret = qcow_write(bs, sector_num, buf, s->cluster_sectors);
if (ret < 0) {
goto fail;
}
} else {
cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
out_len, 0, 0);
if (cluster_offset == 0) {
ret = -EIO;
goto fail;
}
cluster_offset &= s->cluster_offset_mask;
ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
ret = qcow_co_writev(bs, offset >> BDRV_SECTOR_BITS,
bytes >> BDRV_SECTOR_BITS, qiov);
if (ret < 0) {
goto fail;
}
goto success;
}
qemu_co_mutex_lock(&s->lock);
cluster_offset = get_cluster_offset(bs, offset, 2, out_len, 0, 0);
qemu_co_mutex_unlock(&s->lock);
if (cluster_offset == 0) {
ret = -EIO;
goto fail;
}
cluster_offset &= s->cluster_offset_mask;
iov = (struct iovec) {
.iov_base = out_buf,
.iov_len = out_len,
};
qemu_iovec_init_external(&hd_qiov, &iov, 1);
ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
if (ret < 0) {
goto fail;
}
success:
ret = 0;
fail:
qemu_vfree(buf);
g_free(out_buf);
return ret;
}
@@ -1085,7 +1050,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
.bdrv_write_compressed = qcow_write_compressed,
.bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed,
.bdrv_get_info = qcow_get_info,
.create_opts = &qcow_create_opts,

View File

@@ -615,9 +615,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)
/* Discard is optional, ignore the return value */
if (ret >= 0) {
bdrv_discard(bs->file->bs,
d->offset >> BDRV_SECTOR_BITS,
d->bytes >> BDRV_SECTOR_BITS);
bdrv_pdiscard(bs->file->bs, d->offset, d->bytes);
}
g_free(d);

View File

@@ -2479,15 +2479,15 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
return ret;
}
static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
{
int ret;
BDRVQcow2State *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
nb_sectors, QCOW2_DISCARD_REQUEST, false);
ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
QCOW2_DISCARD_REQUEST, false);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
@@ -2533,86 +2533,41 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
return 0;
}
typedef struct Qcow2WriteCo {
BlockDriverState *bs;
int64_t sector_num;
const uint8_t *buf;
int nb_sectors;
int ret;
} Qcow2WriteCo;
static void qcow2_write_co_entry(void *opaque)
{
Qcow2WriteCo *co = opaque;
QEMUIOVector qiov;
uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
struct iovec iov = (struct iovec) {
.iov_base = (uint8_t*) co->buf,
.iov_len = bytes,
};
qemu_iovec_init_external(&qiov, &iov, 1);
co->ret = qcow2_co_pwritev(co->bs, offset, bytes, &qiov, 0);
}
/* Wrapper for non-coroutine contexts */
static int qcow2_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
Coroutine *co;
AioContext *aio_context = bdrv_get_aio_context(bs);
Qcow2WriteCo data = {
.bs = bs,
.sector_num = sector_num,
.buf = buf,
.nb_sectors = nb_sectors,
.ret = -EINPROGRESS,
};
co = qemu_coroutine_create(qcow2_write_co_entry, &data);
qemu_coroutine_enter(co);
while (data.ret == -EINPROGRESS) {
aio_poll(aio_context, true);
}
return data.ret;
}
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
static coroutine_fn int
qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov)
{
BDRVQcow2State *s = bs->opaque;
QEMUIOVector hd_qiov;
struct iovec iov;
z_stream strm;
int ret, out_len;
uint8_t *out_buf;
uint8_t *buf, *out_buf;
uint64_t cluster_offset;
if (nb_sectors == 0) {
if (bytes == 0) {
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
cluster_offset = bdrv_getlength(bs->file->bs);
return bdrv_truncate(bs->file->bs, cluster_offset);
}
if (nb_sectors != s->cluster_sectors) {
ret = -EINVAL;
/* Zero-pad last write if image size is not cluster aligned */
if (sector_num + nb_sectors == bs->total_sectors &&
nb_sectors < s->cluster_sectors) {
uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
memset(pad_buf, 0, s->cluster_size);
memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
ret = qcow2_write_compressed(bs, sector_num,
pad_buf, s->cluster_sectors);
qemu_vfree(pad_buf);
buf = qemu_blockalign(bs, s->cluster_size);
if (bytes != s->cluster_size) {
if (bytes > s->cluster_size ||
offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
{
qemu_vfree(buf);
return -EINVAL;
}
return ret;
/* Zero-pad last write if image size is not cluster aligned */
memset(buf + bytes, 0, s->cluster_size - bytes);
}
qemu_iovec_to_buf(qiov, 0, buf, bytes);
out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
out_buf = g_malloc(s->cluster_size);
/* best compression, small window, no zlib header */
memset(&strm, 0, sizeof(strm));
@@ -2641,33 +2596,44 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
/* could not compress: write normal cluster */
ret = qcow2_write(bs, sector_num, buf, s->cluster_sectors);
if (ret < 0) {
goto fail;
}
} else {
cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
sector_num << 9, out_len);
if (!cluster_offset) {
ret = -EIO;
goto fail;
}
cluster_offset &= s->cluster_offset_mask;
ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
if (ret < 0) {
goto fail;
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
if (ret < 0) {
goto fail;
}
goto success;
}
qemu_co_mutex_lock(&s->lock);
cluster_offset =
qcow2_alloc_compressed_cluster_offset(bs, offset, out_len);
if (!cluster_offset) {
qemu_co_mutex_unlock(&s->lock);
ret = -EIO;
goto fail;
}
cluster_offset &= s->cluster_offset_mask;
ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
goto fail;
}
iov = (struct iovec) {
.iov_base = out_buf,
.iov_len = out_len,
};
qemu_iovec_init_external(&hd_qiov, &iov, 1);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
if (ret < 0) {
goto fail;
}
success:
ret = 0;
fail:
qemu_vfree(buf);
g_free(out_buf);
return ret;
}
@@ -3410,9 +3376,9 @@ BlockDriver bdrv_qcow2 = {
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
.bdrv_co_discard = qcow2_co_discard,
.bdrv_co_pdiscard = qcow2_co_pdiscard,
.bdrv_truncate = qcow2_truncate,
.bdrv_write_compressed = qcow2_write_compressed,
.bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
.bdrv_make_empty = qcow2_make_empty,
.bdrv_snapshot_create = qcow2_snapshot_create,

View File

@@ -1214,7 +1214,7 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
}
static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
int64_t offset, QEMUIOVector *qiov, int count,
BlockCompletionFunc *cb, void *opaque, int type)
{
RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
@@ -1224,8 +1224,8 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
acb->aio_nbytes = count;
acb->aio_offset = offset;
if (qiov) {
acb->aio_iov = qiov->iov;
@@ -1233,7 +1233,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
assert(qiov->size == acb->aio_nbytes);
}
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
trace_paio_submit(acb, opaque, offset, count, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
@@ -1786,13 +1786,13 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
return ret | BDRV_BLOCK_OFFSET_VALID | start;
}
static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
return paio_submit(bs, s->fd, offset, NULL, count,
cb, opaque, QEMU_AIO_DISCARD);
}
@@ -1864,7 +1864,7 @@ BlockDriver bdrv_file = {
.bdrv_co_preadv = raw_co_preadv,
.bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_aio_pdiscard = raw_aio_pdiscard,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
@@ -2203,8 +2203,8 @@ static int fd_open(BlockDriverState *bs)
return -EIO;
}
static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs,
int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
@@ -2212,7 +2212,7 @@ static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
if (fd_open(bs) < 0) {
return NULL;
}
return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
return paio_submit(bs, s->fd, offset, NULL, count,
cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
@@ -2307,7 +2307,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_co_preadv = raw_co_preadv,
.bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_aio_pdiscard = hdev_aio_pdiscard,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,

View File

@@ -142,7 +142,7 @@ static int aio_worker(void *arg)
}
static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
int64_t offset, QEMUIOVector *qiov, int count,
BlockCompletionFunc *cb, void *opaque, int type)
{
RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
@@ -155,11 +155,12 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
assert(qiov->size == count);
}
acb->aio_nbytes = nb_sectors * 512;
acb->aio_offset = sector_num * 512;
acb->aio_nbytes = count;
acb->aio_offset = offset;
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
trace_paio_submit(acb, opaque, offset, count, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
@@ -378,9 +379,10 @@ static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
BDRVRawState *s = bs->opaque;
if (s->aio) {
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
nb_sectors, cb, opaque, QEMU_AIO_READ);
nb_sectors, cb, opaque, QEMU_AIO_READ);
} else {
return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
nb_sectors << BDRV_SECTOR_BITS,
cb, opaque, QEMU_AIO_READ);
}
}
@@ -392,9 +394,10 @@ static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
BDRVRawState *s = bs->opaque;
if (s->aio) {
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
nb_sectors, cb, opaque, QEMU_AIO_WRITE);
nb_sectors, cb, opaque, QEMU_AIO_WRITE);
} else {
return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
nb_sectors << BDRV_SECTOR_BITS,
cb, opaque, QEMU_AIO_WRITE);
}
}

View File

@@ -50,33 +50,30 @@ static int raw_reopen_prepare(BDRVReopenState *reopen_state,
return 0;
}
static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
{
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov);
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
}
static int coroutine_fn
raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov, int flags)
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
{
void *buf = NULL;
BlockDriver *drv;
QEMUIOVector local_qiov;
int ret;
if (bs->probed && sector_num == 0) {
/* As long as these conditions are true, we can't get partial writes to
* the probe buffer and can just directly check the request. */
if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
/* Handling partial writes would be a pain - so we just
* require that guests have 512-byte request alignment if
* probing occurred */
QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
if (nb_sectors == 0) {
/* qemu_iovec_to_buf() would fail, but we want to return success
* instead of -EINVAL in this case. */
return 0;
}
assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE);
buf = qemu_try_blockalign(bs->file->bs, 512);
if (!buf) {
@@ -105,8 +102,7 @@ raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
ret = bdrv_co_pwritev(bs->file, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);
ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
fail:
if (qiov == &local_qiov) {
@@ -134,10 +130,10 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
}
static int coroutine_fn raw_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
{
return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
return bdrv_co_pdiscard(bs->file->bs, offset, count);
}
static int64_t raw_getlength(BlockDriverState *bs)
@@ -150,6 +146,16 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return bdrv_get_info(bs->file->bs, bdi);
}
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
if (bs->probed) {
/* To make it easier to protect the first sector, any probed
* image is restricted to read-modify-write on sub-sector
* operations. */
bs->bl.request_alignment = BDRV_SECTOR_SIZE;
}
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
{
return bdrv_truncate(bs->file->bs, offset);
@@ -192,8 +198,10 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->sg = bs->file->bs->sg;
bs->supported_write_flags = BDRV_REQ_FUA;
bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP;
bs->supported_write_flags = BDRV_REQ_FUA &
bs->file->bs->supported_write_flags;
bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
bs->file->bs->supported_zero_flags;
if (bs->probed && !bdrv_is_read_only(bs)) {
fprintf(stderr,
@@ -238,15 +246,16 @@ BlockDriver bdrv_raw = {
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
.bdrv_create = &raw_create,
.bdrv_co_readv = &raw_co_readv,
.bdrv_co_writev_flags = &raw_co_writev_flags,
.bdrv_co_preadv = &raw_co_preadv,
.bdrv_co_pwritev = &raw_co_pwritev,
.bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
.bdrv_co_discard = &raw_co_discard,
.bdrv_co_pdiscard = &raw_co_pdiscard,
.bdrv_co_get_block_status = &raw_co_get_block_status,
.bdrv_truncate = &raw_truncate,
.bdrv_getlength = &raw_getlength,
.has_variable_length = true,
.bdrv_get_info = &raw_get_info,
.bdrv_refresh_limits = &raw_refresh_limits,
.bdrv_probe_blocksizes = &raw_probe_blocksizes,
.bdrv_probe_geometry = &raw_probe_geometry,
.bdrv_media_changed = &raw_media_changed,

View File

@@ -649,9 +649,9 @@ static int rbd_aio_flush_wrapper(rbd_image_t image,
}
static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
int64_t sector_num,
int64_t off,
QEMUIOVector *qiov,
int nb_sectors,
int64_t size,
BlockCompletionFunc *cb,
void *opaque,
RBDAIOCmd cmd)
@@ -659,7 +659,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
RBDAIOCB *acb;
RADOSCB *rcb = NULL;
rbd_completion_t c;
int64_t off, size;
char *buf;
int r;
@@ -668,6 +667,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
acb->cmd = cmd;
acb->qiov = qiov;
assert(!qiov || qiov->size == size);
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
@@ -687,9 +687,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
buf = acb->bounce;
off = sector_num * BDRV_SECTOR_SIZE;
size = nb_sectors * BDRV_SECTOR_SIZE;
rcb = g_new(RADOSCB, 1);
rcb->acb = acb;
rcb->buf = buf;
@@ -739,7 +736,8 @@ static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
{
return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
RBD_AIO_READ);
}
@@ -750,7 +748,8 @@ static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
{
return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
RBD_AIO_WRITE);
}
@@ -931,13 +930,13 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
}
#ifdef LIBRBD_SUPPORTS_DISCARD
static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors,
BlockCompletionFunc *cb,
void *opaque)
static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
int64_t offset,
int count,
BlockCompletionFunc *cb,
void *opaque)
{
return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
return rbd_start_aio(bs, offset, NULL, count, cb, opaque,
RBD_AIO_DISCARD);
}
#endif
@@ -1001,7 +1000,7 @@ static BlockDriver bdrv_rbd = {
#endif
#ifdef LIBRBD_SUPPORTS_DISCARD
.bdrv_aio_discard = qemu_rbd_aio_discard,
.bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard,
#endif
.bdrv_snapshot_create = qemu_rbd_snap_create,

View File

@@ -2800,8 +2800,8 @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
}
static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
int count)
{
SheepdogAIOCB *acb;
BDRVSheepdogState *s = bs->opaque;
@@ -2811,7 +2811,7 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
uint32_t zero = 0;
if (!s->discard_supported) {
return 0;
return 0;
}
memset(&discard_iov, 0, sizeof(discard_iov));
@@ -2820,7 +2820,10 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
iov.iov_len = sizeof(zero);
discard_iov.iov = &iov;
discard_iov.niov = 1;
acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors);
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((count & (BDRV_SECTOR_SIZE - 1)) == 0);
acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
acb->aiocb_type = AIOCB_DISCARD_OBJ;
acb->aio_done_func = sd_finish_aiocb;
@@ -2954,7 +2957,7 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
.bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
@@ -2990,7 +2993,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
.bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
@@ -3026,7 +3029,7 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
.bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,

View File

@@ -508,36 +508,73 @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
return ret;
}
static QemuOptsList ssh_runtime_opts = {
.name = "ssh",
.head = QTAILQ_HEAD_INITIALIZER(ssh_runtime_opts.head),
.desc = {
{
.name = "host",
.type = QEMU_OPT_STRING,
.help = "Host to connect to",
},
{
.name = "port",
.type = QEMU_OPT_NUMBER,
.help = "Port to connect to",
},
{
.name = "path",
.type = QEMU_OPT_STRING,
.help = "Path of the image on the host",
},
{
.name = "user",
.type = QEMU_OPT_STRING,
.help = "User as which to connect",
},
{
.name = "host_key_check",
.type = QEMU_OPT_STRING,
.help = "Defines how and what to check the host key against",
},
},
};
static int connect_to_ssh(BDRVSSHState *s, QDict *options,
int ssh_flags, int creat_mode, Error **errp)
{
int r, ret;
QemuOpts *opts = NULL;
Error *local_err = NULL;
const char *host, *user, *path, *host_key_check;
int port;
if (!qdict_haskey(options, "host")) {
opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
ret = -EINVAL;
error_propagate(errp, local_err);
goto err;
}
host = qemu_opt_get(opts, "host");
if (!host) {
ret = -EINVAL;
error_setg(errp, "No hostname was specified");
goto err;
}
host = qdict_get_str(options, "host");
if (qdict_haskey(options, "port")) {
port = qdict_get_int(options, "port");
} else {
port = 22;
}
port = qemu_opt_get_number(opts, "port", 22);
if (!qdict_haskey(options, "path")) {
path = qemu_opt_get(opts, "path");
if (!path) {
ret = -EINVAL;
error_setg(errp, "No path was specified");
goto err;
}
path = qdict_get_str(options, "path");
if (qdict_haskey(options, "user")) {
user = qdict_get_str(options, "user");
} else {
user = qemu_opt_get(opts, "user");
if (!user) {
user = g_get_user_name();
if (!user) {
error_setg_errno(errp, errno, "Can't get user name");
@@ -546,9 +583,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
}
}
if (qdict_haskey(options, "host_key_check")) {
host_key_check = qdict_get_str(options, "host_key_check");
} else {
host_key_check = qemu_opt_get(opts, "host_key_check");
if (!host_key_check) {
host_key_check = "yes";
}
@@ -612,21 +648,14 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
goto err;
}
qemu_opts_del(opts);
r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
if (r < 0) {
sftp_error_setg(errp, s, "failed to read file attributes");
return -EINVAL;
}
/* Delete the options we've used; any not deleted will cause the
* block layer to give an error about unused options.
*/
qdict_del(options, "host");
qdict_del(options, "port");
qdict_del(options, "user");
qdict_del(options, "path");
qdict_del(options, "host_key_check");
return 0;
err:
@@ -646,6 +675,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
}
s->session = NULL;
qemu_opts_del(opts);
return ret;
}

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# block.c
bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags %#x format_name \"%s\""
@@ -9,7 +9,7 @@ blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags
blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
# block/io.c
bdrv_aio_discard(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
bdrv_aio_pdiscard(void *bs, int64_t offset, int count, void *opaque) "bs %p offset %"PRId64" count %d opaque %p"
bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
@@ -58,7 +58,7 @@ qmp_block_stream(void *bs, void *job) "bs %p job %p"
# block/raw-win32.c
# block/raw-posix.c
paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
# block/qcow2.c
qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"

View File

@@ -1645,56 +1645,11 @@ vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
return ret;
}
typedef struct VmdkWriteCompressedCo {
BlockDriverState *bs;
int64_t sector_num;
const uint8_t *buf;
int nb_sectors;
int ret;
} VmdkWriteCompressedCo;
static void vmdk_co_write_compressed(void *opaque)
static int coroutine_fn
vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov)
{
VmdkWriteCompressedCo *co = opaque;
QEMUIOVector local_qiov;
uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
struct iovec iov = (struct iovec) {
.iov_base = (uint8_t*) co->buf,
.iov_len = bytes,
};
qemu_iovec_init_external(&local_qiov, &iov, 1);
co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);
}
static int vmdk_write_compressed(BlockDriverState *bs,
int64_t sector_num,
const uint8_t *buf,
int nb_sectors)
{
BDRVVmdkState *s = bs->opaque;
if (s->num_extents == 1 && s->extents[0].compressed) {
Coroutine *co;
AioContext *aio_context = bdrv_get_aio_context(bs);
VmdkWriteCompressedCo data = {
.bs = bs,
.sector_num = sector_num,
.buf = buf,
.nb_sectors = nb_sectors,
.ret = -EINPROGRESS,
};
co = qemu_coroutine_create(vmdk_co_write_compressed, &data);
qemu_coroutine_enter(co);
while (data.ret == -EINPROGRESS) {
aio_poll(aio_context, true);
}
return data.ret;
} else {
return -ENOTSUP;
}
return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
}
static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
@@ -2393,7 +2348,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_reopen_prepare = vmdk_reopen_prepare,
.bdrv_co_preadv = vmdk_co_preadv,
.bdrv_co_pwritev = vmdk_co_pwritev,
.bdrv_write_compressed = vmdk_write_compressed,
.bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed,
.bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes,
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,

View File

@@ -145,7 +145,8 @@ void qmp_nbd_server_start(SocketAddress *addr,
void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
Error **errp)
{
BlockBackend *blk;
BlockDriverState *bs = NULL;
BlockBackend *on_eject_blk;
NBDExport *exp;
if (!nbd_server) {
@@ -158,26 +159,22 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
return;
}
blk = blk_by_name(device);
if (!blk) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
return;
}
if (!blk_is_inserted(blk)) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
on_eject_blk = blk_by_name(device);
bs = bdrv_lookup_bs(device, device, errp);
if (!bs) {
return;
}
if (!has_writable) {
writable = false;
}
if (blk_is_read_only(blk)) {
if (bdrv_is_read_only(bs)) {
writable = false;
}
exp = nbd_export_new(blk, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL,
errp);
exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY,
NULL, false, on_eject_blk, errp);
if (!exp) {
return;
}

View File

@@ -1174,6 +1174,28 @@ fail:
return dinfo;
}
static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp)
{
BlockDriverState *bs;
bs = bdrv_lookup_bs(name, name, errp);
if (bs == NULL) {
return NULL;
}
if (!bdrv_is_root_node(bs)) {
error_setg(errp, "Need a root block node");
return NULL;
}
if (!bdrv_is_inserted(bs)) {
error_setg(errp, "Device has no medium");
return NULL;
}
return bs;
}
void hmp_commit(Monitor *mon, const QDict *qdict)
{
const char *device = qdict_get_str(qdict, "device");
@@ -1284,21 +1306,17 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
Error **errp)
{
BlockDriverState *bs;
BlockBackend *blk;
AioContext *aio_context;
QEMUSnapshotInfo sn;
Error *local_err = NULL;
SnapshotInfo *info = NULL;
int ret;
blk = blk_by_name(device);
if (!blk) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
bs = qmp_get_root_bs(device, errp);
if (!bs) {
return NULL;
}
aio_context = blk_get_aio_context(blk);
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
if (!has_id) {
@@ -1314,12 +1332,6 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
goto out_aio_context;
}
if (!blk_is_available(blk)) {
error_setg(errp, "Device '%s' has no medium", device);
goto out_aio_context;
}
bs = blk_bs(blk);
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
goto out_aio_context;
}
@@ -1499,7 +1511,6 @@ static void internal_snapshot_prepare(BlkActionState *common,
Error *local_err = NULL;
const char *device;
const char *name;
BlockBackend *blk;
BlockDriverState *bs;
QEMUSnapshotInfo old_sn, *sn;
bool ret;
@@ -1522,23 +1533,15 @@ static void internal_snapshot_prepare(BlkActionState *common,
return;
}
blk = blk_by_name(device);
if (!blk) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
bs = qmp_get_root_bs(device, errp);
if (!bs) {
return;
}
/* AioContext is released in .clean() */
state->aio_context = blk_get_aio_context(blk);
state->aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(state->aio_context);
if (!blk_is_available(blk)) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
return;
}
bs = blk_bs(blk);
state->bs = bs;
bdrv_drained_begin(bs);
@@ -1838,56 +1841,31 @@ typedef struct DriveBackupState {
BlockJob *job;
} DriveBackupState;
static void do_drive_backup(const char *job_id, const char *device,
const char *target, bool has_format,
const char *format, enum MirrorSyncMode sync,
bool has_mode, enum NewImageMode mode,
bool has_speed, int64_t speed,
bool has_bitmap, const char *bitmap,
bool has_on_source_error,
BlockdevOnError on_source_error,
bool has_on_target_error,
BlockdevOnError on_target_error,
BlockJobTxn *txn, Error **errp);
static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
Error **errp);
static void drive_backup_prepare(BlkActionState *common, Error **errp)
{
DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
BlockBackend *blk;
BlockDriverState *bs;
DriveBackup *backup;
Error *local_err = NULL;
assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
backup = common->action->u.drive_backup.data;
blk = blk_by_name(backup->device);
if (!blk) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", backup->device);
return;
}
if (!blk_is_available(blk)) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
bs = qmp_get_root_bs(backup->device, errp);
if (!bs) {
return;
}
/* AioContext is released in .clean() */
state->aio_context = blk_get_aio_context(blk);
state->aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(state->aio_context);
bdrv_drained_begin(blk_bs(blk));
state->bs = blk_bs(blk);
bdrv_drained_begin(bs);
state->bs = bs;
do_drive_backup(backup->has_job_id ? backup->job_id : NULL,
backup->device, backup->target,
backup->has_format, backup->format,
backup->sync,
backup->has_mode, backup->mode,
backup->has_speed, backup->speed,
backup->has_bitmap, backup->bitmap,
backup->has_on_source_error, backup->on_source_error,
backup->has_on_target_error, backup->on_target_error,
common->block_job_txn, &local_err);
do_drive_backup(backup, common->block_job_txn, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -1924,59 +1902,41 @@ typedef struct BlockdevBackupState {
AioContext *aio_context;
} BlockdevBackupState;
static void do_blockdev_backup(const char *job_id, const char *device,
const char *target, enum MirrorSyncMode sync,
bool has_speed, int64_t speed,
bool has_on_source_error,
BlockdevOnError on_source_error,
bool has_on_target_error,
BlockdevOnError on_target_error,
BlockJobTxn *txn, Error **errp);
static void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
Error **errp);
static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
{
BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
BlockdevBackup *backup;
BlockBackend *blk, *target;
BlockDriverState *bs, *target;
Error *local_err = NULL;
assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
backup = common->action->u.blockdev_backup.data;
blk = blk_by_name(backup->device);
if (!blk) {
error_setg(errp, "Device '%s' not found", backup->device);
bs = qmp_get_root_bs(backup->device, errp);
if (!bs) {
return;
}
if (!blk_is_available(blk)) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
return;
}
target = blk_by_name(backup->target);
target = bdrv_lookup_bs(backup->target, backup->target, errp);
if (!target) {
error_setg(errp, "Device '%s' not found", backup->target);
return;
}
/* AioContext is released in .clean() */
state->aio_context = blk_get_aio_context(blk);
if (state->aio_context != blk_get_aio_context(target)) {
state->aio_context = bdrv_get_aio_context(bs);
if (state->aio_context != bdrv_get_aio_context(target)) {
state->aio_context = NULL;
error_setg(errp, "Backup between two IO threads is not implemented");
return;
}
aio_context_acquire(state->aio_context);
state->bs = blk_bs(blk);
state->bs = bs;
bdrv_drained_begin(state->bs);
do_blockdev_backup(backup->has_job_id ? backup->job_id : NULL,
backup->device, backup->target, backup->sync,
backup->has_speed, backup->speed,
backup->has_on_source_error, backup->on_source_error,
backup->has_on_target_error, backup->on_target_error,
common->block_job_txn, &local_err);
do_blockdev_backup(backup, common->block_job_txn, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -2634,49 +2594,17 @@ fail:
}
/* throttling disk I/O limits */
void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
int64_t bps_wr,
int64_t iops,
int64_t iops_rd,
int64_t iops_wr,
bool has_bps_max,
int64_t bps_max,
bool has_bps_rd_max,
int64_t bps_rd_max,
bool has_bps_wr_max,
int64_t bps_wr_max,
bool has_iops_max,
int64_t iops_max,
bool has_iops_rd_max,
int64_t iops_rd_max,
bool has_iops_wr_max,
int64_t iops_wr_max,
bool has_bps_max_length,
int64_t bps_max_length,
bool has_bps_rd_max_length,
int64_t bps_rd_max_length,
bool has_bps_wr_max_length,
int64_t bps_wr_max_length,
bool has_iops_max_length,
int64_t iops_max_length,
bool has_iops_rd_max_length,
int64_t iops_rd_max_length,
bool has_iops_wr_max_length,
int64_t iops_wr_max_length,
bool has_iops_size,
int64_t iops_size,
bool has_group,
const char *group, Error **errp)
void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
{
ThrottleConfig cfg;
BlockDriverState *bs;
BlockBackend *blk;
AioContext *aio_context;
blk = blk_by_name(device);
blk = blk_by_name(arg->device);
if (!blk) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
"Device '%s' not found", arg->device);
return;
}
@@ -2685,59 +2613,59 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
bs = blk_bs(blk);
if (!bs) {
error_setg(errp, "Device '%s' has no medium", device);
error_setg(errp, "Device '%s' has no medium", arg->device);
goto out;
}
throttle_config_init(&cfg);
cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps;
cfg.buckets[THROTTLE_BPS_READ].avg = bps_rd;
cfg.buckets[THROTTLE_BPS_WRITE].avg = bps_wr;
cfg.buckets[THROTTLE_BPS_TOTAL].avg = arg->bps;
cfg.buckets[THROTTLE_BPS_READ].avg = arg->bps_rd;
cfg.buckets[THROTTLE_BPS_WRITE].avg = arg->bps_wr;
cfg.buckets[THROTTLE_OPS_TOTAL].avg = iops;
cfg.buckets[THROTTLE_OPS_READ].avg = iops_rd;
cfg.buckets[THROTTLE_OPS_WRITE].avg = iops_wr;
cfg.buckets[THROTTLE_OPS_TOTAL].avg = arg->iops;
cfg.buckets[THROTTLE_OPS_READ].avg = arg->iops_rd;
cfg.buckets[THROTTLE_OPS_WRITE].avg = arg->iops_wr;
if (has_bps_max) {
cfg.buckets[THROTTLE_BPS_TOTAL].max = bps_max;
if (arg->has_bps_max) {
cfg.buckets[THROTTLE_BPS_TOTAL].max = arg->bps_max;
}
if (has_bps_rd_max) {
cfg.buckets[THROTTLE_BPS_READ].max = bps_rd_max;
if (arg->has_bps_rd_max) {
cfg.buckets[THROTTLE_BPS_READ].max = arg->bps_rd_max;
}
if (has_bps_wr_max) {
cfg.buckets[THROTTLE_BPS_WRITE].max = bps_wr_max;
if (arg->has_bps_wr_max) {
cfg.buckets[THROTTLE_BPS_WRITE].max = arg->bps_wr_max;
}
if (has_iops_max) {
cfg.buckets[THROTTLE_OPS_TOTAL].max = iops_max;
if (arg->has_iops_max) {
cfg.buckets[THROTTLE_OPS_TOTAL].max = arg->iops_max;
}
if (has_iops_rd_max) {
cfg.buckets[THROTTLE_OPS_READ].max = iops_rd_max;
if (arg->has_iops_rd_max) {
cfg.buckets[THROTTLE_OPS_READ].max = arg->iops_rd_max;
}
if (has_iops_wr_max) {
cfg.buckets[THROTTLE_OPS_WRITE].max = iops_wr_max;
if (arg->has_iops_wr_max) {
cfg.buckets[THROTTLE_OPS_WRITE].max = arg->iops_wr_max;
}
if (has_bps_max_length) {
cfg.buckets[THROTTLE_BPS_TOTAL].burst_length = bps_max_length;
if (arg->has_bps_max_length) {
cfg.buckets[THROTTLE_BPS_TOTAL].burst_length = arg->bps_max_length;
}
if (has_bps_rd_max_length) {
cfg.buckets[THROTTLE_BPS_READ].burst_length = bps_rd_max_length;
if (arg->has_bps_rd_max_length) {
cfg.buckets[THROTTLE_BPS_READ].burst_length = arg->bps_rd_max_length;
}
if (has_bps_wr_max_length) {
cfg.buckets[THROTTLE_BPS_WRITE].burst_length = bps_wr_max_length;
if (arg->has_bps_wr_max_length) {
cfg.buckets[THROTTLE_BPS_WRITE].burst_length = arg->bps_wr_max_length;
}
if (has_iops_max_length) {
cfg.buckets[THROTTLE_OPS_TOTAL].burst_length = iops_max_length;
if (arg->has_iops_max_length) {
cfg.buckets[THROTTLE_OPS_TOTAL].burst_length = arg->iops_max_length;
}
if (has_iops_rd_max_length) {
cfg.buckets[THROTTLE_OPS_READ].burst_length = iops_rd_max_length;
if (arg->has_iops_rd_max_length) {
cfg.buckets[THROTTLE_OPS_READ].burst_length = arg->iops_rd_max_length;
}
if (has_iops_wr_max_length) {
cfg.buckets[THROTTLE_OPS_WRITE].burst_length = iops_wr_max_length;
if (arg->has_iops_wr_max_length) {
cfg.buckets[THROTTLE_OPS_WRITE].burst_length = arg->iops_wr_max_length;
}
if (has_iops_size) {
cfg.op_size = iops_size;
if (arg->has_iops_size) {
cfg.op_size = arg->iops_size;
}
if (!throttle_is_valid(&cfg, errp)) {
@@ -2748,9 +2676,10 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
/* Enable I/O limits if they're not enabled yet, otherwise
* just update the throttling group. */
if (!blk_get_public(blk)->throttle_state) {
blk_io_limits_enable(blk, has_group ? group : device);
} else if (has_group) {
blk_io_limits_update_group(blk, group);
blk_io_limits_enable(blk,
arg->has_group ? arg->group : arg->device);
} else if (arg->has_group) {
blk_io_limits_update_group(blk, arg->group);
}
/* Set the new throttling configuration */
blk_set_io_limits(blk, &cfg);
@@ -3014,7 +2943,6 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
bool has_on_error, BlockdevOnError on_error,
Error **errp)
{
BlockBackend *blk;
BlockDriverState *bs;
BlockDriverState *base_bs = NULL;
AioContext *aio_context;
@@ -3025,22 +2953,14 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
on_error = BLOCKDEV_ON_ERROR_REPORT;
}
blk = blk_by_name(device);
if (!blk) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
bs = qmp_get_root_bs(device, errp);
if (!bs) {
return;
}
aio_context = blk_get_aio_context(blk);
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
if (!blk_is_available(blk)) {
error_setg(errp, "Device '%s' has no medium", device);
goto out;
}
bs = blk_bs(blk);
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
goto out;
}
@@ -3086,7 +3006,6 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
bool has_speed, int64_t speed,
Error **errp)
{
BlockBackend *blk;
BlockDriverState *bs;
BlockDriverState *base_bs, *top_bs;
AioContext *aio_context;
@@ -3105,22 +3024,22 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
* live commit feature versions; for this to work, we must make sure to
* perform the device lookup before any generic errors that may occur in a
* scenario in which all optional arguments are omitted. */
blk = blk_by_name(device);
if (!blk) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
bs = qmp_get_root_bs(device, &local_err);
if (!bs) {
bs = bdrv_lookup_bs(device, device, NULL);
if (!bs) {
error_free(local_err);
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
} else {
error_propagate(errp, local_err);
}
return;
}
aio_context = blk_get_aio_context(blk);
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
if (!blk_is_available(blk)) {
error_setg(errp, "Device '%s' has no medium", device);
goto out;
}
bs = blk_bs(blk);
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) {
goto out;
}
@@ -3186,19 +3105,8 @@ out:
aio_context_release(aio_context);
}
static void do_drive_backup(const char *job_id, const char *device,
const char *target, bool has_format,
const char *format, enum MirrorSyncMode sync,
bool has_mode, enum NewImageMode mode,
bool has_speed, int64_t speed,
bool has_bitmap, const char *bitmap,
bool has_on_source_error,
BlockdevOnError on_source_error,
bool has_on_target_error,
BlockdevOnError on_target_error,
BlockJobTxn *txn, Error **errp)
static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error **errp)
{
BlockBackend *blk;
BlockDriverState *bs;
BlockDriverState *target_bs;
BlockDriverState *source = NULL;
@@ -3209,39 +3117,36 @@ static void do_drive_backup(const char *job_id, const char *device,
int flags;
int64_t size;
if (!has_speed) {
speed = 0;
if (!backup->has_speed) {
backup->speed = 0;
}
if (!has_on_source_error) {
on_source_error = BLOCKDEV_ON_ERROR_REPORT;
if (!backup->has_on_source_error) {
backup->on_source_error = BLOCKDEV_ON_ERROR_REPORT;
}
if (!has_on_target_error) {
on_target_error = BLOCKDEV_ON_ERROR_REPORT;
if (!backup->has_on_target_error) {
backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT;
}
if (!has_mode) {
mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
if (!backup->has_mode) {
backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
}
if (!backup->has_job_id) {
backup->job_id = NULL;
}
if (!backup->has_compress) {
backup->compress = false;
}
blk = blk_by_name(device);
if (!blk) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
bs = qmp_get_root_bs(backup->device, errp);
if (!bs) {
return;
}
aio_context = blk_get_aio_context(blk);
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
/* Although backup_run has this check too, we need to use bs->drv below, so
* do an early check redundantly. */
if (!blk_is_available(blk)) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
goto out;
}
bs = blk_bs(blk);
if (!has_format) {
format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
if (!backup->has_format) {
backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ?
NULL : (char*) bs->drv->format_name;
}
/* Early check to avoid creating target */
@@ -3253,13 +3158,13 @@ static void do_drive_backup(const char *job_id, const char *device,
/* See if we have a backing HD we can use to create our new image
* on top of. */
if (sync == MIRROR_SYNC_MODE_TOP) {
if (backup->sync == MIRROR_SYNC_MODE_TOP) {
source = backing_bs(bs);
if (!source) {
sync = MIRROR_SYNC_MODE_FULL;
backup->sync = MIRROR_SYNC_MODE_FULL;
}
}
if (sync == MIRROR_SYNC_MODE_NONE) {
if (backup->sync == MIRROR_SYNC_MODE_NONE) {
source = bs;
}
@@ -3269,14 +3174,14 @@ static void do_drive_backup(const char *job_id, const char *device,
goto out;
}
if (mode != NEW_IMAGE_MODE_EXISTING) {
assert(format);
if (backup->mode != NEW_IMAGE_MODE_EXISTING) {
assert(backup->format);
if (source) {
bdrv_img_create(target, format, source->filename,
bdrv_img_create(backup->target, backup->format, source->filename,
source->drv->format_name, NULL,
size, flags, &local_err, false);
} else {
bdrv_img_create(target, format, NULL, NULL, NULL,
bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL,
size, flags, &local_err, false);
}
}
@@ -3286,30 +3191,30 @@ static void do_drive_backup(const char *job_id, const char *device,
goto out;
}
if (format) {
if (backup->format) {
options = qdict_new();
qdict_put(options, "driver", qstring_from_str(format));
qdict_put(options, "driver", qstring_from_str(backup->format));
}
target_bs = bdrv_open(target, NULL, options, flags, errp);
target_bs = bdrv_open(backup->target, NULL, options, flags, errp);
if (!target_bs) {
goto out;
}
bdrv_set_aio_context(target_bs, aio_context);
if (has_bitmap) {
bmap = bdrv_find_dirty_bitmap(bs, bitmap);
if (backup->has_bitmap) {
bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
if (!bmap) {
error_setg(errp, "Bitmap '%s' could not be found", bitmap);
error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap);
bdrv_unref(target_bs);
goto out;
}
}
backup_start(job_id, bs, target_bs, speed, sync, bmap,
on_source_error, on_target_error,
block_job_cb, bs, txn, &local_err);
backup_start(backup->job_id, bs, target_bs, backup->speed, backup->sync,
bmap, backup->compress, backup->on_source_error,
backup->on_target_error, block_job_cb, bs, txn, &local_err);
bdrv_unref(target_bs);
if (local_err != NULL) {
error_propagate(errp, local_err);
@@ -3320,24 +3225,9 @@ out:
aio_context_release(aio_context);
}
void qmp_drive_backup(bool has_job_id, const char *job_id,
const char *device, const char *target,
bool has_format, const char *format,
enum MirrorSyncMode sync,
bool has_mode, enum NewImageMode mode,
bool has_speed, int64_t speed,
bool has_bitmap, const char *bitmap,
bool has_on_source_error, BlockdevOnError on_source_error,
bool has_on_target_error, BlockdevOnError on_target_error,
Error **errp)
void qmp_drive_backup(DriveBackup *arg, Error **errp)
{
return do_drive_backup(has_job_id ? job_id : NULL, device, target,
has_format, format, sync,
has_mode, mode, has_speed, speed,
has_bitmap, bitmap,
has_on_source_error, on_source_error,
has_on_target_error, on_target_error,
NULL, errp);
return do_drive_backup(arg, NULL, errp);
}
BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
@@ -3345,47 +3235,38 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
return bdrv_named_nodes_list(errp);
}
void do_blockdev_backup(const char *job_id, const char *device,
const char *target, enum MirrorSyncMode sync,
bool has_speed, int64_t speed,
bool has_on_source_error,
BlockdevOnError on_source_error,
bool has_on_target_error,
BlockdevOnError on_target_error,
BlockJobTxn *txn, Error **errp)
void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, Error **errp)
{
BlockBackend *blk;
BlockDriverState *bs;
BlockDriverState *target_bs;
Error *local_err = NULL;
AioContext *aio_context;
if (!has_speed) {
speed = 0;
if (!backup->has_speed) {
backup->speed = 0;
}
if (!has_on_source_error) {
on_source_error = BLOCKDEV_ON_ERROR_REPORT;
if (!backup->has_on_source_error) {
backup->on_source_error = BLOCKDEV_ON_ERROR_REPORT;
}
if (!has_on_target_error) {
on_target_error = BLOCKDEV_ON_ERROR_REPORT;
if (!backup->has_on_target_error) {
backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT;
}
if (!backup->has_job_id) {
backup->job_id = NULL;
}
if (!backup->has_compress) {
backup->compress = false;
}
blk = blk_by_name(device);
if (!blk) {
error_setg(errp, "Device '%s' not found", device);
bs = qmp_get_root_bs(backup->device, errp);
if (!bs) {
return;
}
aio_context = blk_get_aio_context(blk);
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
if (!blk_is_available(blk)) {
error_setg(errp, "Device '%s' has no medium", device);
goto out;
}
bs = blk_bs(blk);
target_bs = bdrv_lookup_bs(target, target, errp);
target_bs = bdrv_lookup_bs(backup->target, backup->target, errp);
if (!target_bs) {
goto out;
}
@@ -3401,8 +3282,9 @@ void do_blockdev_backup(const char *job_id, const char *device,
goto out;
}
}
backup_start(job_id, bs, target_bs, speed, sync, NULL, on_source_error,
on_target_error, block_job_cb, bs, txn, &local_err);
backup_start(backup->job_id, bs, target_bs, backup->speed, backup->sync,
NULL, backup->compress, backup->on_source_error,
backup->on_target_error, block_job_cb, bs, txn, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
}
@@ -3410,21 +3292,9 @@ out:
aio_context_release(aio_context);
}
void qmp_blockdev_backup(bool has_job_id, const char *job_id,
const char *device, const char *target,
enum MirrorSyncMode sync,
bool has_speed, int64_t speed,
bool has_on_source_error,
BlockdevOnError on_source_error,
bool has_on_target_error,
BlockdevOnError on_target_error,
Error **errp)
void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp)
{
do_blockdev_backup(has_job_id ? job_id : NULL, device, target,
sync, has_speed, speed,
has_on_source_error, on_source_error,
has_on_target_error, on_target_error,
NULL, errp);
do_blockdev_backup(arg, NULL, errp);
}
/* Parameter check and block job starting for drive mirroring.
@@ -3497,22 +3367,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
block_job_cb, bs, errp);
}
void qmp_drive_mirror(bool has_job_id, const char *job_id, const char *device,
const char *target, bool has_format, const char *format,
bool has_node_name, const char *node_name,
bool has_replaces, const char *replaces,
enum MirrorSyncMode sync,
bool has_mode, enum NewImageMode mode,
bool has_speed, int64_t speed,
bool has_granularity, uint32_t granularity,
bool has_buf_size, int64_t buf_size,
bool has_on_source_error, BlockdevOnError on_source_error,
bool has_on_target_error, BlockdevOnError on_target_error,
bool has_unmap, bool unmap,
Error **errp)
void qmp_drive_mirror(DriveMirror *arg, Error **errp)
{
BlockDriverState *bs;
BlockBackend *blk;
BlockDriverState *source, *target_bs;
AioContext *aio_context;
BlockMirrorBackingMode backing_mode;
@@ -3520,36 +3377,31 @@ void qmp_drive_mirror(bool has_job_id, const char *job_id, const char *device,
QDict *options = NULL;
int flags;
int64_t size;
const char *format = arg->format;
blk = blk_by_name(device);
if (!blk) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
bs = qmp_get_root_bs(arg->device, errp);
if (!bs) {
return;
}
aio_context = blk_get_aio_context(blk);
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
if (!blk_is_available(blk)) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
goto out;
}
bs = blk_bs(blk);
if (!has_mode) {
mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
if (!arg->has_mode) {
arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
}
if (!has_format) {
format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
if (!arg->has_format) {
format = (arg->mode == NEW_IMAGE_MODE_EXISTING
? NULL : bs->drv->format_name);
}
flags = bs->open_flags | BDRV_O_RDWR;
source = backing_bs(bs);
if (!source && sync == MIRROR_SYNC_MODE_TOP) {
sync = MIRROR_SYNC_MODE_FULL;
if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) {
arg->sync = MIRROR_SYNC_MODE_FULL;
}
if (sync == MIRROR_SYNC_MODE_NONE) {
if (arg->sync == MIRROR_SYNC_MODE_NONE) {
source = bs;
}
@@ -3559,18 +3411,18 @@ void qmp_drive_mirror(bool has_job_id, const char *job_id, const char *device,
goto out;
}
if (has_replaces) {
if (arg->has_replaces) {
BlockDriverState *to_replace_bs;
AioContext *replace_aio_context;
int64_t replace_size;
if (!has_node_name) {
if (!arg->has_node_name) {
error_setg(errp, "a node-name must be provided when replacing a"
" named node of the graph");
goto out;
}
to_replace_bs = check_to_replace_node(bs, replaces, &local_err);
to_replace_bs = check_to_replace_node(bs, arg->replaces, &local_err);
if (!to_replace_bs) {
error_propagate(errp, local_err);
@@ -3589,26 +3441,26 @@ void qmp_drive_mirror(bool has_job_id, const char *job_id, const char *device,
}
}
if (mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
backing_mode = MIRROR_SOURCE_BACKING_CHAIN;
} else {
backing_mode = MIRROR_OPEN_BACKING_CHAIN;
}
if ((sync == MIRROR_SYNC_MODE_FULL || !source)
&& mode != NEW_IMAGE_MODE_EXISTING)
if ((arg->sync == MIRROR_SYNC_MODE_FULL || !source)
&& arg->mode != NEW_IMAGE_MODE_EXISTING)
{
/* create new image w/o backing file */
assert(format);
bdrv_img_create(target, format,
bdrv_img_create(arg->target, format,
NULL, NULL, NULL, size, flags, &local_err, false);
} else {
switch (mode) {
switch (arg->mode) {
case NEW_IMAGE_MODE_EXISTING:
break;
case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
/* create new image with backing file */
bdrv_img_create(target, format,
bdrv_img_create(arg->target, format,
source->filename,
source->drv->format_name,
NULL, size, flags, &local_err, false);
@@ -3624,8 +3476,8 @@ void qmp_drive_mirror(bool has_job_id, const char *job_id, const char *device,
}
options = qdict_new();
if (has_node_name) {
qdict_put(options, "node-name", qstring_from_str(node_name));
if (arg->has_node_name) {
qdict_put(options, "node-name", qstring_from_str(arg->node_name));
}
if (format) {
qdict_put(options, "driver", qstring_from_str(format));
@@ -3634,22 +3486,22 @@ void qmp_drive_mirror(bool has_job_id, const char *job_id, const char *device,
/* Mirroring takes care of copy-on-write using the source's backing
* file.
*/
target_bs = bdrv_open(target, NULL, options, flags | BDRV_O_NO_BACKING,
errp);
target_bs = bdrv_open(arg->target, NULL, options,
flags | BDRV_O_NO_BACKING, errp);
if (!target_bs) {
goto out;
}
bdrv_set_aio_context(target_bs, aio_context);
blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
has_replaces, replaces, sync, backing_mode,
has_speed, speed,
has_granularity, granularity,
has_buf_size, buf_size,
has_on_source_error, on_source_error,
has_on_target_error, on_target_error,
has_unmap, unmap,
blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
arg->has_replaces, arg->replaces, arg->sync,
backing_mode, arg->has_speed, arg->speed,
arg->has_granularity, arg->granularity,
arg->has_buf_size, arg->buf_size,
arg->has_on_source_error, arg->on_source_error,
arg->has_on_target_error, arg->on_target_error,
arg->has_unmap, arg->unmap,
&local_err);
bdrv_unref(target_bs);
error_propagate(errp, local_err);
@@ -3671,21 +3523,13 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
Error **errp)
{
BlockDriverState *bs;
BlockBackend *blk;
BlockDriverState *target_bs;
AioContext *aio_context;
BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
Error *local_err = NULL;
blk = blk_by_name(device);
if (!blk) {
error_setg(errp, "Device '%s' not found", device);
return;
}
bs = blk_bs(blk);
bs = qmp_get_root_bs(device, errp);
if (!bs) {
error_setg(errp, "Device '%s' has no media", device);
return;
}
@@ -3826,7 +3670,6 @@ void qmp_change_backing_file(const char *device,
const char *backing_file,
Error **errp)
{
BlockBackend *blk;
BlockDriverState *bs = NULL;
AioContext *aio_context;
BlockDriverState *image_bs = NULL;
@@ -3835,22 +3678,14 @@ void qmp_change_backing_file(const char *device,
int open_flags;
int ret;
blk = blk_by_name(device);
if (!blk) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
bs = qmp_get_root_bs(device, errp);
if (!bs) {
return;
}
aio_context = blk_get_aio_context(blk);
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
if (!blk_is_available(blk)) {
error_setg(errp, "Device '%s' has no medium", device);
goto out;
}
bs = blk_bs(blk);
image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err);
if (local_err) {
error_propagate(errp, local_err);

View File

@@ -132,6 +132,10 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
if (job_id == NULL) {
job_id = bdrv_get_device_name(bs);
if (!*job_id) {
error_setg(errp, "An explicit job ID is required for this node");
return NULL;
}
}
if (!id_wellformed(job_id)) {

View File

@@ -17,6 +17,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu-version.h"
#include <machine/trap.h>
#include "qapi/error.h"
@@ -171,7 +172,7 @@ void cpu_loop(CPUX86State *env)
//target_siginfo_t info;
for(;;) {
trapnr = cpu_x86_exec(cs);
trapnr = cpu_exec(cs);
switch(trapnr) {
case 0x80:
/* syscall from int $0x80 */
@@ -512,7 +513,7 @@ void cpu_loop(CPUSPARCState *env)
//target_siginfo_t info;
while (1) {
trapnr = cpu_sparc_exec(cs);
trapnr = cpu_exec(cs);
switch (trapnr) {
#ifndef TARGET_SPARC64
@@ -667,7 +668,8 @@ void cpu_loop(CPUSPARCState *env)
static void usage(void)
{
printf("qemu-" TARGET_NAME " version " QEMU_VERSION ", Copyright (c) 2003-2008 Fabrice Bellard\n"
printf("qemu-" TARGET_NAME " version " QEMU_VERSION QEMU_PKGVERSION
", " QEMU_COPYRIGHT "\n"
"usage: qemu-" TARGET_NAME " [options] program [arguments...]\n"
"BSD CPU emulator (compiled for %s emulation)\n"
"\n"

View File

@@ -209,8 +209,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
abi_ulong new_addr);
int target_msync(abi_ulong start, abi_ulong len, int flags);
extern unsigned long last_brk;
void cpu_list_lock(void);
void cpu_list_unlock(void);
#if defined(CONFIG_USE_NPTL)
void mmap_fork_start(void);
void mmap_fork_end(int child);

73
configure vendored
View File

@@ -229,6 +229,7 @@ xfs=""
vhost_net="no"
vhost_scsi="no"
vhost_vsock="no"
kvm="no"
rdma=""
gprof="no"
@@ -674,6 +675,7 @@ Haiku)
kvm="yes"
vhost_net="yes"
vhost_scsi="yes"
vhost_vsock="yes"
QEMU_INCLUDES="-I\$(SRC_PATH)/linux-headers -I$(pwd)/linux-headers $QEMU_INCLUDES"
;;
esac
@@ -1017,6 +1019,10 @@ for opt do
;;
--enable-vhost-scsi) vhost_scsi="yes"
;;
--disable-vhost-vsock) vhost_vsock="no"
;;
--enable-vhost-vsock) vhost_vsock="yes"
;;
--disable-opengl) opengl="no"
;;
--enable-opengl) opengl="yes"
@@ -1452,7 +1458,7 @@ fi
gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
gcc_flags="-Wendif-labels $gcc_flags"
gcc_flags="-Wendif-labels -Wno-shift-negative-value $gcc_flags"
gcc_flags="-Wno-initializer-overrides $gcc_flags"
gcc_flags="-Wno-string-plus-int $gcc_flags"
# Note that we do not add -Werror to gcc_flags here, because that would
@@ -1788,7 +1794,9 @@ fi
##########################################
# avx2 optimization requirement check
cat > $TMPC << EOF
if test "$static" = "no" ; then
cat > $TMPC << EOF
#pragma GCC push_options
#pragma GCC target("avx2")
#include <cpuid.h>
@@ -1801,12 +1809,13 @@ static void *bar_ifunc(void) {return (void*) bar;}
int foo(void *a) __attribute__((ifunc("bar_ifunc")));
int main(int argc, char *argv[]) { return foo(argv[0]);}
EOF
if compile_object "" ; then
if has readelf; then
if readelf --syms $TMPO 2>/dev/null |grep -q "IFUNC.*foo"; then
avx2_opt="yes"
fi
fi
if compile_object "" ; then
if has readelf; then
if readelf --syms $TMPO 2>/dev/null |grep -q "IFUNC.*foo"; then
avx2_opt="yes"
fi
fi
fi
fi
#########################################
@@ -3121,6 +3130,7 @@ else
if test "$found" = "no"; then
LIBS="$pthread_lib $LIBS"
fi
PTHREAD_LIB="$pthread_lib"
break
fi
done
@@ -4050,13 +4060,13 @@ fi
if test "$mingw32" = "yes" -a "$guest_agent" != "no" -a "$vss_win32_sdk" != "no" ; then
case "$vss_win32_sdk" in
"") vss_win32_include="-I$source_path" ;;
"") vss_win32_include="-isystem $source_path" ;;
*\ *) # The SDK is installed in "Program Files" by default, but we cannot
# handle path with spaces. So we symlink the headers into ".sdk/vss".
vss_win32_include="-I$source_path/.sdk/vss"
vss_win32_include="-isystem $source_path/.sdk/vss"
symlink "$vss_win32_sdk/inc" "$source_path/.sdk/vss/inc"
;;
*) vss_win32_include="-I$vss_win32_sdk"
*) vss_win32_include="-isystem $vss_win32_sdk"
esac
cat > $TMPC << EOF
#define __MIDL_user_allocate_free_DEFINED__
@@ -4187,6 +4197,18 @@ if compile_prog "" "" ; then
posix_madvise=yes
fi
##########################################
# check if we have posix_syslog
posix_syslog=no
cat > $TMPC << EOF
#include <syslog.h>
int main(void) { openlog("qemu", LOG_PID, LOG_DAEMON); syslog(LOG_INFO, "configure"); return 0; }
EOF
if compile_prog "" "" ; then
posix_syslog=yes
fi
##########################################
# check if trace backend exists
@@ -4695,7 +4717,16 @@ roms=
if test \( "$cpu" = "i386" -o "$cpu" = "x86_64" \) -a \
"$targetos" != "Darwin" -a "$targetos" != "SunOS" -a \
"$softmmu" = yes ; then
roms="optionrom"
# Different host OS linkers have different ideas about the name of the ELF
# emulation. Linux and OpenBSD use 'elf_i386'; FreeBSD uses the _fbsd
# variant; and Windows uses i386pe.
for emu in elf_i386 elf_i386_fbsd i386pe; do
if "$ld" -verbose 2>&1 | grep -q "^[[:space:]]*$emu[[:space:]]*$"; then
ld_i386_emulation="$emu"
roms="optionrom"
break
fi
done
fi
if test "$cpu" = "ppc64" -a "$targetos" != "Darwin" ; then
roms="$roms spapr-rtas"
@@ -4858,6 +4889,7 @@ echo "uuid support $uuid"
echo "libcap-ng support $cap_ng"
echo "vhost-net support $vhost_net"
echo "vhost-scsi support $vhost_scsi"
echo "vhost-vsock support $vhost_vsock"
echo "Trace backends $trace_backends"
if have_backend "simple"; then
echo "Trace output file $trace_file-<pid>"
@@ -5239,6 +5271,9 @@ fi
if test "$vhost_net" = "yes" ; then
echo "CONFIG_VHOST_NET_USED=y" >> $config_host_mak
fi
if test "$vhost_vsock" = "yes" ; then
echo "CONFIG_VHOST_VSOCK=y" >> $config_host_mak
fi
if test "$blobs" = "yes" ; then
echo "INSTALL_BLOBS=yes" >> $config_host_mak
fi
@@ -5455,6 +5490,13 @@ if have_backend "ftrace"; then
feature_not_found "ftrace(trace backend)" "ftrace requires Linux"
fi
fi
if have_backend "syslog"; then
if test "$posix_syslog" = "yes" ; then
echo "CONFIG_TRACE_SYSLOG=y" >> $config_host_mak
else
feature_not_found "syslog(trace backend)" "syslog not available"
fi
fi
echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak
if test "$rdma" = "yes" ; then
@@ -5535,8 +5577,10 @@ fi
echo "LDFLAGS=$LDFLAGS" >> $config_host_mak
echo "LDFLAGS_NOPIE=$LDFLAGS_NOPIE" >> $config_host_mak
echo "LD_REL_FLAGS=$LD_REL_FLAGS" >> $config_host_mak
echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak
echo "LIBS+=$LIBS" >> $config_host_mak
echo "LIBS_TOOLS+=$libs_tools" >> $config_host_mak
echo "PTHREAD_LIB=$PTHREAD_LIB" >> $config_host_mak
echo "EXESUF=$EXESUF" >> $config_host_mak
echo "DSOSUF=$DSOSUF" >> $config_host_mak
echo "LDFLAGS_SHARED=$LDFLAGS_SHARED" >> $config_host_mak
@@ -5993,6 +6037,11 @@ for rom in seabios vgabios ; do
echo "LD=$ld" >> $config_mak
done
# set up tests data directory
if [ ! -e tests/data ]; then
symlink "$source_path/tests/data" tests/data
fi
# set up qemu-iotests in this build directory
iotests_common_env="tests/qemu-iotests/common.env"
iotests_check="tests/qemu-iotests/check"

View File

@@ -2,6 +2,7 @@ crypto-obj-y = init.o
crypto-obj-y += hash.o
crypto-obj-$(CONFIG_NETTLE) += hash-nettle.o
crypto-obj-$(if $(CONFIG_NETTLE),n,$(CONFIG_GCRYPT)) += hash-gcrypt.o
crypto-obj-$(if $(CONFIG_NETTLE),n,$(if $(CONFIG_GCRYPT),n,y)) += hash-glib.o
crypto-obj-y += aes.o
crypto-obj-y += desrfb.o
crypto-obj-y += cipher.o
@@ -12,6 +13,7 @@ crypto-obj-y += tlssession.o
crypto-obj-y += secret.o
crypto-obj-$(CONFIG_GCRYPT) += random-gcrypt.o
crypto-obj-$(if $(CONFIG_GCRYPT),n,$(CONFIG_GNUTLS_RND)) += random-gnutls.o
crypto-obj-$(if $(CONFIG_GCRYPT),n,$(if $(CONFIG_GNUTLS_RND),n,y)) += random-platform.o
crypto-obj-y += pbkdf.o
crypto-obj-$(CONFIG_NETTLE_KDF) += pbkdf-nettle.o
crypto-obj-$(if $(CONFIG_NETTLE_KDF),n,$(CONFIG_GCRYPT_KDF)) += pbkdf-gcrypt.o
@@ -28,6 +30,4 @@ crypto-obj-y += block-luks.o
# Let the userspace emulators avoid linking gnutls/etc
crypto-aes-obj-y = aes.o
stub-obj-y += random-stub.o
stub-obj-y += pbkdf-stub.o
stub-obj-y += hash-stub.o

View File

@@ -201,6 +201,15 @@ QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSHeader) != 592);
struct QCryptoBlockLUKS {
QCryptoBlockLUKSHeader header;
/* Cache parsed versions of what's in header fields,
* as we can't rely on QCryptoBlock.cipher being
* non-NULL */
QCryptoCipherAlgorithm cipher_alg;
QCryptoCipherMode cipher_mode;
QCryptoIVGenAlgorithm ivgen_alg;
QCryptoHashAlgorithm ivgen_hash_alg;
QCryptoHashAlgorithm hash_alg;
};
@@ -847,6 +856,12 @@ qcrypto_block_luks_open(QCryptoBlock *block,
block->payload_offset = luks->header.payload_offset *
QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
luks->cipher_alg = cipheralg;
luks->cipher_mode = ciphermode;
luks->ivgen_alg = ivalg;
luks->ivgen_hash_alg = ivhash;
luks->hash_alg = hash;
g_free(masterkey);
g_free(password);
@@ -1271,6 +1286,12 @@ qcrypto_block_luks_create(QCryptoBlock *block,
goto error;
}
luks->cipher_alg = luks_opts.cipher_alg;
luks->cipher_mode = luks_opts.cipher_mode;
luks->ivgen_alg = luks_opts.ivgen_alg;
luks->ivgen_hash_alg = luks_opts.ivgen_hash_alg;
luks->hash_alg = luks_opts.hash_alg;
memset(masterkey, 0, luks->header.key_bytes);
g_free(masterkey);
memset(slotkey, 0, luks->header.key_bytes);
@@ -1305,6 +1326,51 @@ qcrypto_block_luks_create(QCryptoBlock *block,
}
static int qcrypto_block_luks_get_info(QCryptoBlock *block,
QCryptoBlockInfo *info,
Error **errp)
{
QCryptoBlockLUKS *luks = block->opaque;
QCryptoBlockInfoLUKSSlot *slot;
QCryptoBlockInfoLUKSSlotList *slots = NULL, **prev = &info->u.luks.slots;
size_t i;
info->u.luks.cipher_alg = luks->cipher_alg;
info->u.luks.cipher_mode = luks->cipher_mode;
info->u.luks.ivgen_alg = luks->ivgen_alg;
if (info->u.luks.ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) {
info->u.luks.has_ivgen_hash_alg = true;
info->u.luks.ivgen_hash_alg = luks->ivgen_hash_alg;
}
info->u.luks.hash_alg = luks->hash_alg;
info->u.luks.payload_offset = block->payload_offset;
info->u.luks.master_key_iters = luks->header.master_key_iterations;
info->u.luks.uuid = g_strndup((const char *)luks->header.uuid,
sizeof(luks->header.uuid));
for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
slots = g_new0(QCryptoBlockInfoLUKSSlotList, 1);
*prev = slots;
slots->value = slot = g_new0(QCryptoBlockInfoLUKSSlot, 1);
slot->active = luks->header.key_slots[i].active ==
QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED;
slot->key_offset = luks->header.key_slots[i].key_offset
* QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
if (slot->active) {
slot->has_iters = true;
slot->iters = luks->header.key_slots[i].iterations;
slot->has_stripes = true;
slot->stripes = luks->header.key_slots[i].stripes;
}
prev = &slots->next;
}
return 0;
}
static void qcrypto_block_luks_cleanup(QCryptoBlock *block)
{
g_free(block->opaque);
@@ -1342,6 +1408,7 @@ qcrypto_block_luks_encrypt(QCryptoBlock *block,
const QCryptoBlockDriver qcrypto_block_driver_luks = {
.open = qcrypto_block_luks_open,
.create = qcrypto_block_luks_create,
.get_info = qcrypto_block_luks_get_info,
.cleanup = qcrypto_block_luks_cleanup,
.decrypt = qcrypto_block_luks_decrypt,
.encrypt = qcrypto_block_luks_encrypt,

View File

@@ -59,7 +59,8 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
!qcrypto_block_drivers[options->format]) {
error_setg(errp, "Unsupported block driver %d", options->format);
error_setg(errp, "Unsupported block driver %s",
QCryptoBlockFormat_lookup[options->format]);
g_free(block);
return NULL;
}
@@ -88,7 +89,8 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
!qcrypto_block_drivers[options->format]) {
error_setg(errp, "Unsupported block driver %d", options->format);
error_setg(errp, "Unsupported block driver %s",
QCryptoBlockFormat_lookup[options->format]);
g_free(block);
return NULL;
}
@@ -105,6 +107,23 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
}
QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block,
Error **errp)
{
QCryptoBlockInfo *info = g_new0(QCryptoBlockInfo, 1);
info->format = block->format;
if (block->driver->get_info &&
block->driver->get_info(block, info, errp) < 0) {
g_free(info);
return NULL;
}
return info;
}
int qcrypto_block_decrypt(QCryptoBlock *block,
uint64_t startsector,
uint8_t *buf,

View File

@@ -53,6 +53,10 @@ struct QCryptoBlockDriver {
void *opaque,
Error **errp);
int (*get_info)(QCryptoBlock *block,
QCryptoBlockInfo *info,
Error **errp);
void (*cleanup)(QCryptoBlock *block);
int (*encrypt)(QCryptoBlock *block,

View File

@@ -244,7 +244,8 @@ static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
if (cipher->mode != QCRYPTO_CIPHER_MODE_CBC &&
cipher->mode != QCRYPTO_CIPHER_MODE_ECB &&
cipher->mode != QCRYPTO_CIPHER_MODE_XTS) {
error_setg(errp, "Unsupported cipher mode %d", cipher->mode);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[cipher->mode]);
return -1;
}
@@ -376,7 +377,8 @@ static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
QCryptoCipherBuiltin *ctxt;
if (cipher->mode != QCRYPTO_CIPHER_MODE_ECB) {
error_setg(errp, "Unsupported cipher mode %d", cipher->mode);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[cipher->mode]);
return -1;
}
@@ -442,7 +444,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
break;
default:
error_setg(errp,
"Unsupported cipher algorithm %d", cipher->alg);
"Unsupported cipher algorithm %s",
QCryptoCipherAlgorithm_lookup[cipher->alg]);
goto error;
}

View File

@@ -70,7 +70,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
gcrymode = GCRY_CIPHER_MODE_CBC;
break;
default:
error_setg(errp, "Unsupported cipher mode %d", mode);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[mode]);
return NULL;
}
@@ -120,7 +121,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
break;
default:
error_setg(errp, "Unsupported cipher algorithm %d", alg);
error_setg(errp, "Unsupported cipher algorithm %s",
QCryptoCipherAlgorithm_lookup[alg]);
return NULL;
}
@@ -192,6 +194,12 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
}
if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
if (ctx->blocksize != XTS_BLOCK_SIZE) {
error_setg(errp,
"Cipher block size %zu must equal XTS block size %d",
ctx->blocksize, XTS_BLOCK_SIZE);
goto error;
}
ctx->iv = g_new0(uint8_t, ctx->blocksize);
}

View File

@@ -227,7 +227,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
case QCRYPTO_CIPHER_MODE_XTS:
break;
default:
error_setg(errp, "Unsupported cipher mode %d", mode);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[mode]);
return NULL;
}
@@ -357,7 +358,15 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
break;
default:
error_setg(errp, "Unsupported cipher algorithm %d", alg);
error_setg(errp, "Unsupported cipher algorithm %s",
QCryptoCipherAlgorithm_lookup[alg]);
goto error;
}
if (mode == QCRYPTO_CIPHER_MODE_XTS &&
ctx->blocksize != XTS_BLOCK_SIZE) {
error_setg(errp, "Cipher block size %zu must equal XTS block size %d",
ctx->blocksize, XTS_BLOCK_SIZE);
goto error;
}
@@ -422,8 +431,8 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
break;
default:
error_setg(errp, "Unsupported cipher algorithm %d",
cipher->alg);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[cipher->mode]);
return -1;
}
return 0;
@@ -456,19 +465,14 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
break;
case QCRYPTO_CIPHER_MODE_XTS:
if (ctx->blocksize != XTS_BLOCK_SIZE) {
error_setg(errp, "Block size must be %d not %zu",
XTS_BLOCK_SIZE, ctx->blocksize);
return -1;
}
xts_decrypt(ctx->ctx, ctx->ctx_tweak,
ctx->alg_encrypt_wrapper, ctx->alg_decrypt_wrapper,
ctx->iv, len, out, in);
break;
default:
error_setg(errp, "Unsupported cipher algorithm %d",
cipher->alg);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[cipher->mode]);
return -1;
}
return 0;

View File

@@ -55,8 +55,7 @@ int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
gcry_md_hd_t md;
unsigned char *digest;
if (alg >= G_N_ELEMENTS(qcrypto_hash_alg_map) ||
qcrypto_hash_alg_map[alg] == GCRY_MD_NONE) {
if (!qcrypto_hash_supports(alg)) {
error_setg(errp,
"Unknown hash algorithm %d",
alg);

97
crypto/hash-glib.c Normal file
View File

@@ -0,0 +1,97 @@
/*
* QEMU Crypto hash algorithms
*
* Copyright (c) 2016 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "crypto/hash.h"
static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
[QCRYPTO_HASH_ALG_MD5] = G_CHECKSUM_MD5,
[QCRYPTO_HASH_ALG_SHA1] = G_CHECKSUM_SHA1,
[QCRYPTO_HASH_ALG_SHA224] = -1,
[QCRYPTO_HASH_ALG_SHA256] = G_CHECKSUM_SHA256,
[QCRYPTO_HASH_ALG_SHA384] = -1,
#if GLIB_CHECK_VERSION(2, 36, 0)
[QCRYPTO_HASH_ALG_SHA512] = G_CHECKSUM_SHA512,
#else
[QCRYPTO_HASH_ALG_SHA512] = -1,
#endif
[QCRYPTO_HASH_ALG_RIPEMD160] = -1,
};
gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
{
if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) &&
qcrypto_hash_alg_map[alg] != -1) {
return true;
}
return false;
}
int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
const struct iovec *iov,
size_t niov,
uint8_t **result,
size_t *resultlen,
Error **errp)
{
int i, ret;
GChecksum *cs;
if (!qcrypto_hash_supports(alg)) {
error_setg(errp,
"Unknown hash algorithm %d",
alg);
return -1;
}
cs = g_checksum_new(qcrypto_hash_alg_map[alg]);
for (i = 0; i < niov; i++) {
g_checksum_update(cs, iov[i].iov_base, iov[i].iov_len);
}
ret = g_checksum_type_get_length(qcrypto_hash_alg_map[alg]);
if (ret < 0) {
error_setg(errp, "%s",
"Unable to get hash length");
goto error;
}
if (*resultlen == 0) {
*resultlen = ret;
*result = g_new0(uint8_t, *resultlen);
} else if (*resultlen != ret) {
error_setg(errp,
"Result buffer size %zu is smaller than hash %d",
*resultlen, ret);
goto error;
}
g_checksum_get_digest(cs, *result, resultlen);
g_checksum_free(cs);
return 0;
error:
g_checksum_free(cs);
return -1;
}

View File

@@ -113,8 +113,7 @@ int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
int i;
union qcrypto_hash_ctx ctx;
if (alg >= G_N_ELEMENTS(qcrypto_hash_alg_map) ||
qcrypto_hash_alg_map[alg].init == NULL) {
if (!qcrypto_hash_supports(alg)) {
error_setg(errp,
"Unknown hash algorithm %d",
alg);

View File

@@ -1,41 +0,0 @@
/*
* QEMU Crypto hash algorithms
*
* Copyright (c) 2016 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "crypto/hash.h"
gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg G_GNUC_UNUSED)
{
return false;
}
int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
const struct iovec *iov G_GNUC_UNUSED,
size_t niov G_GNUC_UNUSED,
uint8_t **result G_GNUC_UNUSED,
size_t *resultlen G_GNUC_UNUSED,
Error **errp)
{
error_setg(errp,
"Hash algorithm %d not supported without GNUTLS",
alg);
return -1;
}

View File

@@ -59,8 +59,7 @@
#if (defined(CONFIG_GCRYPT) && \
(!defined(CONFIG_GNUTLS) || \
!defined(GNUTLS_VERSION_NUMBER) || \
(GNUTLS_VERSION_NUMBER < 0x020c00)) && \
(LIBGNUTLS_VERSION_NUMBER < 0x020c00)) && \
(!defined(GCRYPT_VERSION_NUMBER) || \
(GCRYPT_VERSION_NUMBER < 0x010600)))
#define QCRYPTO_INIT_GCRYPT_THREADS

View File

@@ -26,6 +26,39 @@ int qcrypto_random_bytes(uint8_t *buf G_GNUC_UNUSED,
size_t buflen G_GNUC_UNUSED,
Error **errp)
{
error_setg(errp, "No random byte source provided in this build");
return -1;
int fd;
int ret = -1;
int got;
/* TBD perhaps also add support for BSD getentropy / Linux
* getrandom syscalls directly */
fd = open("/dev/urandom", O_RDONLY);
if (fd == -1 && errno == ENOENT) {
fd = open("/dev/random", O_RDONLY);
}
if (fd < 0) {
error_setg(errp, "No /dev/urandom or /dev/random found");
return -1;
}
while (buflen > 0) {
got = read(fd, buf, buflen);
if (got < 0) {
error_setg_errno(errp, errno,
"Unable to read random bytes");
goto cleanup;
} else if (!got) {
error_setg(errp,
"Unexpected EOF reading random bytes");
goto cleanup;
}
buflen -= got;
buf += got;
}
ret = 0;
cleanup:
close(fd);
return ret;
}

View File

@@ -615,7 +615,7 @@ qcrypto_tls_creds_x509_load(QCryptoTLSCredsX509 *creds,
}
if (cert != NULL && key != NULL) {
#if GNUTLS_VERSION_NUMBER >= 0x030111
#if LIBGNUTLS_VERSION_NUMBER >= 0x030111
char *password = NULL;
if (creds->passwordid) {
password = qcrypto_secret_lookup_as_utf8(creds->passwordid,
@@ -630,7 +630,7 @@ qcrypto_tls_creds_x509_load(QCryptoTLSCredsX509 *creds,
password,
0);
g_free(password);
#else /* GNUTLS_VERSION_NUMBER < 0x030111 */
#else /* LIBGNUTLS_VERSION_NUMBER < 0x030111 */
if (creds->passwordid) {
error_setg(errp, "PKCS8 decryption requires GNUTLS >= 3.1.11");
goto cleanup;
@@ -638,7 +638,7 @@ qcrypto_tls_creds_x509_load(QCryptoTLSCredsX509 *creds,
ret = gnutls_certificate_set_x509_key_file(creds->data,
cert, key,
GNUTLS_X509_FMT_PEM);
#endif /* GNUTLS_VERSION_NUMBER < 0x030111 */
#endif
if (ret < 0) {
error_setg(errp, "Cannot load certificate '%s' & key '%s': %s",
cert, key, gnutls_strerror(ret));

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# crypto/tlscreds.c
qcrypto_tls_creds_load_dh(void *creds, const char *filename) "TLS creds load DH creds=%p filename=%s"

View File

@@ -410,7 +410,7 @@ following example objects:
=== Commands ===
Usage: { 'command': STRING, '*data': COMPLEX-TYPE-NAME-OR-DICT,
'*returns': TYPE-NAME,
'*returns': TYPE-NAME, '*boxed': true,
'*gen': false, '*success-response': false }
Commands are defined by using a dictionary containing several members,
@@ -461,6 +461,20 @@ which would validate this Client JSON Protocol transaction:
=> { "execute": "my-second-command" }
<= { "return": [ { "value": "one" }, { } ] }
The generator emits a prototype for the user's function implementing
the command. Normally, 'data' is a dictionary for an anonymous type,
or names a struct type (possibly empty, but not a union), and its
members are passed as separate arguments to this function. If the
command definition includes a key 'boxed' with the boolean value true,
then 'data' is instead the name of any non-empty complex type
(struct, union, or alternate), and a pointer to that QAPI type is
passed as a single argument.
The generator also emits a marshalling function that extracts
arguments for the user's function out of an input QDict, calls the
user's function, and if it succeeded, builds an output QObject from
its return value.
In rare cases, QAPI cannot express a type-safe representation of a
corresponding Client JSON Protocol command. You then have to suppress
generation of a marshalling function by including a key 'gen' with
@@ -484,7 +498,8 @@ use of this member.
=== Events ===
Usage: { 'event': STRING, '*data': COMPLEX-TYPE-NAME-OR-DICT }
Usage: { 'event': STRING, '*data': COMPLEX-TYPE-NAME-OR-DICT,
'*boxed': true }
Events are defined with the keyword 'event'. It is not allowed to
name an event 'MAX', since the generator also produces a C enumeration
@@ -505,6 +520,14 @@ Resulting in this JSON object:
"data": { "b": "test string" },
"timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
The generator emits a function to send the event. Normally, 'data' is
a dictionary for an anonymous type, or names a struct type (possibly
empty, but not a union), and its members are passed as separate
arguments to this function. If the event definition includes a key
'boxed' with the boolean value true, then 'data' is instead the name of
any non-empty complex type (struct, union, or alternate), and a
pointer to that QAPI type is passed as a single argument.
== Client JSON Protocol introspection ==

View File

@@ -37,6 +37,8 @@ consists of 3 header fields and a payload:
* Flags: 32-bit bit field:
- Lower 2 bits are the version (currently 0x01)
- Bit 2 is the reply flag - needs to be sent on each reply from the slave
- Bit 3 is the need_reply flag - see VHOST_USER_PROTOCOL_F_REPLY_ACK for
details.
* Size - 32-bit size of the payload
@@ -126,6 +128,8 @@ the ones that do:
* VHOST_GET_VRING_BASE
* VHOST_SET_LOG_BASE (if VHOST_USER_PROTOCOL_F_LOG_SHMFD)
[ Also see the section on REPLY_ACK protocol extension. ]
There are several messages that the master sends with file descriptors passed
in the ancillary data:
@@ -254,6 +258,7 @@ Protocol features
#define VHOST_USER_PROTOCOL_F_MQ 0
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
#define VHOST_USER_PROTOCOL_F_RARP 2
#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
Message types
-------------
@@ -464,3 +469,24 @@ Message types
is present in VHOST_USER_GET_PROTOCOL_FEATURES.
The first 6 bytes of the payload contain the mac address of the guest to
allow the vhost user backend to construct and broadcast the fake RARP.
VHOST_USER_PROTOCOL_F_REPLY_ACK:
-------------------------------
The original vhost-user specification only demands replies for certain
commands. This differs from the vhost protocol implementation where commands
are sent over an ioctl() call and block until the client has completed.
With this protocol extension negotiated, the sender (QEMU) can set the
"need_reply" [Bit 3] flag to any command. This indicates that
the client MUST respond with a Payload VhostUserMsg indicating success or
failure. The payload should be set to zero on success or non-zero on failure,
unless the message already has an explicit reply body.
The response payload gives QEMU a deterministic indication of the result
of the command. Today, QEMU is expected to terminate the main vhost-user
loop upon receiving such errors. In future, qemu could be taught to be more
resilient for selective requests.
For the message types that already solicit a reply from the client, the
presence of VHOST_USER_PROTOCOL_F_REPLY_ACK or need_reply bit being set brings
no behavioural change. (See the 'Communication' section for details.)

View File

@@ -192,6 +192,18 @@ After running qemu by root user, you can get the trace:
Restriction: "ftrace" backend is restricted to Linux only.
=== Syslog ===
The "syslog" backend sends trace events using the POSIX syslog API. The log
is opened specifying the LOG_DAEMON facility and LOG_PID option (so events
are tagged with the pid of the particular QEMU process that generated
them). All events are logged at LOG_INFO level.
NOTE: syslog may squash duplicate consecutive trace events and apply rate
limiting.
Restriction: "syslog" backend is restricted to POSIX compliant OS.
==== Monitor commands ====
* trace-file on|off|flush|set <path>

View File

@@ -28,7 +28,8 @@ virtio core virtio transport virtio device
----------- ---------------- -------------
save() function registered
via register_savevm()
via VMState wrapper on
device class
virtio_save() <----------
------> save_config()
- save proxy device
@@ -63,7 +64,8 @@ virtio core virtio transport virtio device
----------- ---------------- -------------
load() function registered
via register_savevm()
via VMState wrapper on
device class
virtio_load() <----------
------> load_config()
- load proxy device

81
exec.c
View File

@@ -598,67 +598,37 @@ AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
}
#endif
#ifndef CONFIG_USER_ONLY
static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
static bool cpu_index_auto_assigned;
static int cpu_get_free_index(Error **errp)
{
int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
if (cpu >= MAX_CPUMASK_BITS) {
error_setg(errp, "Trying to use more CPUs than max of %d",
MAX_CPUMASK_BITS);
return -1;
}
bitmap_set(cpu_index_map, cpu, 1);
return cpu;
}
static void cpu_release_index(CPUState *cpu)
{
bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
}
#else
static int cpu_get_free_index(Error **errp)
static int cpu_get_free_index(void)
{
CPUState *some_cpu;
int cpu_index = 0;
cpu_index_auto_assigned = true;
CPU_FOREACH(some_cpu) {
cpu_index++;
}
return cpu_index;
}
static void cpu_release_index(CPUState *cpu)
{
return;
}
#endif
void cpu_exec_exit(CPUState *cpu)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
#if defined(CONFIG_USER_ONLY)
cpu_list_lock();
#endif
if (cpu->cpu_index == -1) {
/* cpu_index was never allocated by this @cpu or was already freed. */
#if defined(CONFIG_USER_ONLY)
if (cpu->node.tqe_prev == NULL) {
/* there is nothing to undo since cpu_exec_init() hasn't been called */
cpu_list_unlock();
#endif
return;
}
assert(!(cpu_index_auto_assigned && cpu != QTAILQ_LAST(&cpus, CPUTailQ)));
QTAILQ_REMOVE(&cpus, cpu, node);
cpu_release_index(cpu);
cpu->cpu_index = -1;
#if defined(CONFIG_USER_ONLY)
cpu->node.tqe_prev = NULL;
cpu->cpu_index = UNASSIGNED_CPU_INDEX;
cpu_list_unlock();
#endif
if (cc->vmsd != NULL) {
vmstate_unregister(NULL, cc->vmsd, cpu);
@@ -670,8 +640,8 @@ void cpu_exec_exit(CPUState *cpu)
void cpu_exec_init(CPUState *cpu, Error **errp)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
Error *local_err = NULL;
CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
Error *local_err ATTRIBUTE_UNUSED = NULL;
cpu->as = NULL;
cpu->num_ases = 0;
@@ -694,22 +664,17 @@ void cpu_exec_init(CPUState *cpu, Error **errp)
object_ref(OBJECT(cpu->memory));
#endif
#if defined(CONFIG_USER_ONLY)
cpu_list_lock();
#endif
cpu->cpu_index = cpu_get_free_index(&local_err);
if (local_err) {
error_propagate(errp, local_err);
#if defined(CONFIG_USER_ONLY)
cpu_list_unlock();
#endif
return;
if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
cpu->cpu_index = cpu_get_free_index();
assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
} else {
assert(!cpu_index_auto_assigned);
}
QTAILQ_INSERT_TAIL(&cpus, cpu, node);
#if defined(CONFIG_USER_ONLY)
(void) cc;
cpu_list_unlock();
#else
#ifndef CONFIG_USER_ONLY
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
}
@@ -1268,7 +1233,7 @@ static void *file_ram_alloc(RAMBlock *block,
char *filename;
char *sanitized_name;
char *c;
void *area;
void *area = MAP_FAILED;
int fd = -1;
int64_t page_size;
@@ -1356,13 +1321,19 @@ static void *file_ram_alloc(RAMBlock *block,
}
if (mem_prealloc) {
os_mem_prealloc(fd, area, memory);
os_mem_prealloc(fd, area, memory, errp);
if (errp && *errp) {
goto error;
}
}
block->fd = fd;
return area;
error:
if (area != MAP_FAILED) {
qemu_ram_munmap(area, memory);
}
if (unlink_on_error) {
unlink(path);
}

View File

@@ -197,7 +197,7 @@ float128 float128_default_nan(float_status *status)
| should be simply `float_exception_flags |= flags;'.
*----------------------------------------------------------------------------*/
void float_raise(int8_t flags, float_status *status)
void float_raise(uint8_t flags, float_status *status)
{
status->float_exception_flags |= flags;
}

View File

@@ -1182,8 +1182,8 @@ ETEXI
{
.name = "drive_backup",
.args_type = "reuse:-n,full:-f,device:B,target:s,format:s?",
.params = "[-n] [-f] device target [format]",
.args_type = "reuse:-n,full:-f,compress:-c,device:B,target:s,format:s?",
.params = "[-n] [-f] [-c] device target [format]",
.help = "initiates a point-in-time\n\t\t\t"
"copy for a device. The device's contents are\n\t\t\t"
"copied to the new image file, excluding data that\n\t\t\t"
@@ -1191,7 +1191,9 @@ ETEXI
"The -n flag requests QEMU to reuse the image found\n\t\t\t"
"in new-image-file, instead of recreating it from scratch.\n\t\t\t"
"The -f flag requests QEMU to copy the whole disk,\n\t\t\t"
"so that the result does not need a backing file.\n\t\t\t",
"so that the result does not need a backing file.\n\t\t\t"
"The -c flag requests QEMU to compress backup data\n\t\t\t"
"(if the target format supports it).\n\t\t\t",
.mhandler.cmd = hmp_drive_backup,
},
STEXI

107
hmp.c
View File

@@ -1077,31 +1077,28 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict)
void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
{
const char *device = qdict_get_str(qdict, "device");
const char *filename = qdict_get_str(qdict, "target");
const char *format = qdict_get_try_str(qdict, "format");
bool reuse = qdict_get_try_bool(qdict, "reuse", false);
bool full = qdict_get_try_bool(qdict, "full", false);
enum NewImageMode mode;
Error *err = NULL;
DriveMirror mirror = {
.device = (char *)qdict_get_str(qdict, "device"),
.target = (char *)filename,
.has_format = !!format,
.format = (char *)format,
.sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
.has_mode = true,
.mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS,
.unmap = true,
};
if (!filename) {
error_setg(&err, QERR_MISSING_PARAMETER, "target");
hmp_handle_error(mon, &err);
return;
}
if (reuse) {
mode = NEW_IMAGE_MODE_EXISTING;
} else {
mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
}
qmp_drive_mirror(false, NULL, device, filename, !!format, format,
false, NULL, false, NULL,
full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
true, mode, false, 0, false, 0, false, 0,
false, 0, false, 0, false, true, &err);
qmp_drive_mirror(&mirror, &err);
hmp_handle_error(mon, &err);
}
@@ -1112,8 +1109,19 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
const char *format = qdict_get_try_str(qdict, "format");
bool reuse = qdict_get_try_bool(qdict, "reuse", false);
bool full = qdict_get_try_bool(qdict, "full", false);
enum NewImageMode mode;
bool compress = qdict_get_try_bool(qdict, "compress", false);
Error *err = NULL;
DriveBackup backup = {
.device = (char *)device,
.target = (char *)filename,
.has_format = !!format,
.format = (char *)format,
.sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
.has_mode = true,
.mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS,
.has_compress = !!compress,
.compress = compress,
};
if (!filename) {
error_setg(&err, QERR_MISSING_PARAMETER, "target");
@@ -1121,16 +1129,7 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
return;
}
if (reuse) {
mode = NEW_IMAGE_MODE_EXISTING;
} else {
mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
}
qmp_drive_backup(false, NULL, device, filename, !!format, format,
full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
true, mode, false, 0, false, NULL,
false, 0, false, 0, &err);
qmp_drive_backup(&backup, &err);
hmp_handle_error(mon, &err);
}
@@ -1439,42 +1438,17 @@ void hmp_change(Monitor *mon, const QDict *qdict)
void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
BlockIOThrottle throttle = {
.device = (char *) qdict_get_str(qdict, "device"),
.bps = qdict_get_int(qdict, "bps"),
.bps_rd = qdict_get_int(qdict, "bps_rd"),
.bps_wr = qdict_get_int(qdict, "bps_wr"),
.iops = qdict_get_int(qdict, "iops"),
.iops_rd = qdict_get_int(qdict, "iops_rd"),
.iops_wr = qdict_get_int(qdict, "iops_wr"),
};
qmp_block_set_io_throttle(qdict_get_str(qdict, "device"),
qdict_get_int(qdict, "bps"),
qdict_get_int(qdict, "bps_rd"),
qdict_get_int(qdict, "bps_wr"),
qdict_get_int(qdict, "iops"),
qdict_get_int(qdict, "iops_rd"),
qdict_get_int(qdict, "iops_wr"),
false, /* no burst max via HMP */
0,
false,
0,
false,
0,
false,
0,
false,
0,
false,
0,
false, /* no burst length via HMP */
0,
false,
0,
false,
0,
false,
0,
false,
0,
false,
0,
false, /* No default I/O size */
0,
false,
NULL, &err);
qmp_block_set_io_throttle(&throttle, &err);
hmp_handle_error(mon, &err);
}
@@ -1949,11 +1923,22 @@ void hmp_chardev_remove(Monitor *mon, const QDict *qdict)
void hmp_qemu_io(Monitor *mon, const QDict *qdict)
{
BlockBackend *blk;
BlockBackend *local_blk = NULL;
const char* device = qdict_get_str(qdict, "device");
const char* command = qdict_get_str(qdict, "command");
Error *err = NULL;
blk = blk_by_name(device);
if (!blk) {
BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
if (bs) {
blk = local_blk = blk_new();
blk_insert_bs(blk, bs);
} else {
goto fail;
}
}
if (blk) {
AioContext *aio_context = blk_get_aio_context(blk);
aio_context_acquire(aio_context);
@@ -1966,6 +1951,8 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
"Device '%s' not found", device);
}
fail:
blk_unref(local_blk);
hmp_handle_error(mon, &err);
}

View File

@@ -1010,6 +1010,7 @@ static void v9fs_attach(void *opaque)
goto out;
}
err += offset;
memcpy(&s->root_qid, &qid, sizeof(qid));
trace_v9fs_attach_return(pdu->tag, pdu->id,
qid.type, qid.version, qid.path);
/*
@@ -1256,6 +1257,19 @@ static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
return offset;
}
static bool name_is_illegal(const char *name)
{
return !*name || strchr(name, '/') != NULL;
}
static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2)
{
return
qid1->type != qid2->type ||
qid1->version != qid2->version ||
qid1->path != qid2->path;
}
static void v9fs_walk(void *opaque)
{
int name_idx;
@@ -1271,6 +1285,7 @@ static void v9fs_walk(void *opaque)
V9fsFidState *newfidp = NULL;
V9fsPDU *pdu = opaque;
V9fsState *s = pdu->s;
V9fsQID qid;
err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
if (err < 0) {
@@ -1289,6 +1304,10 @@ static void v9fs_walk(void *opaque)
if (err < 0) {
goto out_nofid;
}
if (name_is_illegal(wnames[i].data)) {
err = -ENOENT;
goto out_nofid;
}
offset += err;
}
} else if (nwnames > P9_MAXWELEM) {
@@ -1300,6 +1319,12 @@ static void v9fs_walk(void *opaque)
err = -ENOENT;
goto out_nofid;
}
err = fid_to_qid(pdu, fidp, &qid);
if (err < 0) {
goto out;
}
v9fs_path_init(&dpath);
v9fs_path_init(&path);
/*
@@ -1309,16 +1334,22 @@ static void v9fs_walk(void *opaque)
v9fs_path_copy(&dpath, &fidp->path);
v9fs_path_copy(&path, &fidp->path);
for (name_idx = 0; name_idx < nwnames; name_idx++) {
err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data, &path);
if (err < 0) {
goto out;
if (not_same_qid(&pdu->s->root_qid, &qid) ||
strcmp("..", wnames[name_idx].data)) {
err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
&path);
if (err < 0) {
goto out;
}
err = v9fs_co_lstat(pdu, &path, &stbuf);
if (err < 0) {
goto out;
}
stat_to_qid(&stbuf, &qid);
v9fs_path_copy(&dpath, &path);
}
err = v9fs_co_lstat(pdu, &path, &stbuf);
if (err < 0) {
goto out;
}
stat_to_qid(&stbuf, &qids[name_idx]);
v9fs_path_copy(&dpath, &path);
memcpy(&qids[name_idx], &qid, sizeof(qid));
}
if (fid == newfid) {
BUG_ON(fidp->fid_type != P9_FID_NONE);
@@ -1483,6 +1514,16 @@ static void v9fs_lcreate(void *opaque)
}
trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
if (name_is_illegal(name.data)) {
err = -ENOENT;
goto out_nofid;
}
if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
err = -EEXIST;
goto out_nofid;
}
fidp = get_fid(pdu, dfid);
if (fidp == NULL) {
err = -ENOENT;
@@ -2077,6 +2118,16 @@ static void v9fs_create(void *opaque)
}
trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
if (name_is_illegal(name.data)) {
err = -ENOENT;
goto out_nofid;
}
if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
err = -EEXIST;
goto out_nofid;
}
fidp = get_fid(pdu, fid);
if (fidp == NULL) {
err = -EINVAL;
@@ -2242,6 +2293,16 @@ static void v9fs_symlink(void *opaque)
}
trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
if (name_is_illegal(name.data)) {
err = -ENOENT;
goto out_nofid;
}
if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
err = -EEXIST;
goto out_nofid;
}
dfidp = get_fid(pdu, dfid);
if (dfidp == NULL) {
err = -EINVAL;
@@ -2316,6 +2377,16 @@ static void v9fs_link(void *opaque)
}
trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
if (name_is_illegal(name.data)) {
err = -ENOENT;
goto out_nofid;
}
if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
err = -EEXIST;
goto out_nofid;
}
dfidp = get_fid(pdu, dfid);
if (dfidp == NULL) {
err = -ENOENT;
@@ -2398,6 +2469,22 @@ static void v9fs_unlinkat(void *opaque)
if (err < 0) {
goto out_nofid;
}
if (name_is_illegal(name.data)) {
err = -ENOENT;
goto out_nofid;
}
if (!strcmp(".", name.data)) {
err = -EINVAL;
goto out_nofid;
}
if (!strcmp("..", name.data)) {
err = -ENOTEMPTY;
goto out_nofid;
}
dfidp = get_fid(pdu, dfid);
if (dfidp == NULL) {
err = -EINVAL;
@@ -2504,6 +2591,17 @@ static void v9fs_rename(void *opaque)
if (err < 0) {
goto out_nofid;
}
if (name_is_illegal(name.data)) {
err = -ENOENT;
goto out_nofid;
}
if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
err = -EISDIR;
goto out_nofid;
}
fidp = get_fid(pdu, fid);
if (fidp == NULL) {
err = -ENOENT;
@@ -2616,6 +2714,17 @@ static void v9fs_renameat(void *opaque)
goto out_err;
}
if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
err = -ENOENT;
goto out_err;
}
if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
!strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
err = -EISDIR;
goto out_err;
}
v9fs_path_write_lock(s);
err = v9fs_complete_renameat(pdu, olddirfid,
&old_name, newdirfid, &new_name);
@@ -2826,6 +2935,16 @@ static void v9fs_mknod(void *opaque)
}
trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
if (name_is_illegal(name.data)) {
err = -ENOENT;
goto out_nofid;
}
if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
err = -EEXIST;
goto out_nofid;
}
fidp = get_fid(pdu, fid);
if (fidp == NULL) {
err = -ENOENT;
@@ -2977,6 +3096,16 @@ static void v9fs_mkdir(void *opaque)
}
trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
if (name_is_illegal(name.data)) {
err = -ENOENT;
goto out_nofid;
}
if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
err = -EEXIST;
goto out_nofid;
}
fidp = get_fid(pdu, fid);
if (fidp == NULL) {
err = -ENOENT;

View File

@@ -236,6 +236,7 @@ typedef struct V9fsState
int32_t root_fid;
Error *migration_blocker;
V9fsConf fsconf;
V9fsQID root_qid;
} V9fsState;
/* 9p2000.L open flags */

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# hw/9pfs/virtio-9p.c
v9fs_rerror(uint16_t tag, uint8_t id, int err) "tag %d id %d err %d"

View File

@@ -97,14 +97,9 @@ static void virtio_9p_get_config(VirtIODevice *vdev, uint8_t *config)
g_free(cfg);
}
static void virtio_9p_save(QEMUFile *f, void *opaque)
static int virtio_9p_load(QEMUFile *f, void *opaque, size_t size)
{
virtio_save(VIRTIO_DEVICE(opaque), f);
}
static int virtio_9p_load(QEMUFile *f, void *opaque, int version_id)
{
return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
return virtio_load(VIRTIO_DEVICE(opaque), f, 1);
}
static void virtio_9p_device_realize(DeviceState *dev, Error **errp)
@@ -120,7 +115,6 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp)
v->config_size = sizeof(struct virtio_9p_config) + strlen(s->fsconf.tag);
virtio_init(vdev, "virtio-9p", VIRTIO_ID_9P, v->config_size);
v->vq = virtio_add_queue(vdev, MAX_REQ, handle_9p_output);
register_savevm(dev, "virtio-9p", -1, 1, virtio_9p_save, virtio_9p_load, v);
out:
return;
@@ -133,7 +127,6 @@ static void virtio_9p_device_unrealize(DeviceState *dev, Error **errp)
V9fsState *s = &v->state;
virtio_cleanup(vdev);
unregister_savevm(dev, "virtio-9p", v);
v9fs_device_unrealize_common(s, errp);
}
@@ -175,6 +168,8 @@ void virtio_init_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov,
/* virtio-9p device */
VMSTATE_VIRTIO_DEVICE(9p, 1, virtio_9p_load, virtio_vmstate_save);
static Property virtio_9p_properties[] = {
DEFINE_PROP_STRING("mount_tag", V9fsVirtioState, state.fsconf.tag),
DEFINE_PROP_STRING("fsdev", V9fsVirtioState, state.fsconf.fsdev_id),
@@ -187,6 +182,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
dc->props = virtio_9p_properties;
dc->vmsd = &vmstate_virtio_9p;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
vdc->realize = virtio_9p_device_realize;
vdc->unrealize = virtio_9p_device_unrealize;

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# hw/acpi/memory_hotplug.c
mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# hw/alpha/pci.c
alpha_pci_iack_write(void) ""

View File

@@ -824,7 +824,6 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
int i;
dev = qdev_create(NULL, TYPE_TYPHOON_PCI_HOST_BRIDGE);
qdev_init_nofail(dev);
s = TYPHOON_PCI_HOST_BRIDGE(dev);
phb = PCI_HOST_BRIDGE(dev);
@@ -889,6 +888,7 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
&s->pchip.reg_mem, &s->pchip.reg_io,
0, 64, TYPE_PCI_BUS);
phb->bus = b;
qdev_init_nofail(dev);
/* Host memory as seen from the PCI side, via the IOMMU. */
memory_region_init_iommu(&s->pchip.iommu, OBJECT(s), &typhoon_iommu_ops,

View File

@@ -27,6 +27,7 @@
#define AST2400_FMC_BASE 0X1E620000
#define AST2400_SPI_BASE 0X1E630000
#define AST2400_VIC_BASE 0x1E6C0000
#define AST2400_SDMC_BASE 0x1E6E0000
#define AST2400_SCU_BASE 0x1E6E2000
#define AST2400_TIMER_BASE 0x1E782000
#define AST2400_I2C_BASE 0x1E78A000
@@ -97,6 +98,12 @@ static void ast2400_init(Object *obj)
object_initialize(&s->spi, sizeof(s->spi), "aspeed.smc.spi");
object_property_add_child(obj, "spi", OBJECT(&s->spi), NULL);
qdev_set_parent_bus(DEVICE(&s->spi), sysbus_get_default());
object_initialize(&s->sdmc, sizeof(s->sdmc), TYPE_ASPEED_SDMC);
object_property_add_child(obj, "sdmc", OBJECT(&s->sdmc), NULL);
qdev_set_parent_bus(DEVICE(&s->sdmc), sysbus_get_default());
qdev_prop_set_uint32(DEVICE(&s->sdmc), "silicon-rev",
AST2400_A0_SILICON_REV);
}
static void ast2400_realize(DeviceState *dev, Error **errp)
@@ -183,6 +190,14 @@ static void ast2400_realize(DeviceState *dev, Error **errp)
}
sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi), 0, AST2400_SPI_BASE);
sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi), 1, AST2400_SPI_FLASH_BASE);
/* SDMC - SDRAM Memory Controller */
object_property_set_bool(OBJECT(&s->sdmc), true, "realized", &err);
if (err) {
error_propagate(errp, err);
return;
}
sysbus_mmio_map(SYS_BUS_DEVICE(&s->sdmc), 0, AST2400_SDMC_BASE);
}
static void ast2400_class_init(ObjectClass *oc, void *data)

View File

@@ -378,7 +378,7 @@ static void eth_cleanup(NetClientState *nc)
}
static NetClientInfo net_mv88w8618_info = {
.type = NET_CLIENT_OPTIONS_KIND_NIC,
.type = NET_CLIENT_DRIVER_NIC,
.size = sizeof(NICState),
.receive = eth_receive,
.cleanup = eth_cleanup,

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# hw/arm/virt-acpi-build.c
virt_acpi_setup(void) "No fw cfg or ACPI disabled. Bailing out."

View File

@@ -53,7 +53,7 @@ static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus)
uint16_t i;
for (i = 0; i < smp_cpus; i++) {
Aml *dev = aml_device("C%03x", i);
Aml *dev = aml_device("C%.03X", i);
aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007")));
aml_append(dev, aml_name_decl("_UID", aml_int(i)));
aml_append(scope, dev);

View File

@@ -60,6 +60,8 @@ typedef struct GUSState {
int64_t last_ticks;
qemu_irq pic;
IsaDma *isa_dma;
PortioList portio_list1;
PortioList portio_list2;
} GUSState;
static uint32_t gus_readb(void *opaque, uint32_t nport)
@@ -265,9 +267,10 @@ static void gus_realizefn (DeviceState *dev, Error **errp)
s->samples = AUD_get_buffer_size_out (s->voice) >> s->shift;
s->mixbuf = g_malloc0 (s->samples << s->shift);
isa_register_portio_list (d, s->port, gus_portio_list1, s, "gus");
isa_register_portio_list (d, (s->port + 0x100) & 0xf00,
gus_portio_list2, s, "gus");
isa_register_portio_list(d, &s->portio_list1, s->port,
gus_portio_list1, s, "gus");
isa_register_portio_list(d, &s->portio_list2, (s->port + 0x100) & 0xf00,
gus_portio_list2, s, "gus");
s->isa_dma = isa_get_dma(isa_bus_from_device(d), s->emu.gusdma);
k = ISADMA_GET_CLASS(s->isa_dma);

View File

@@ -106,6 +106,7 @@ typedef struct SB16State {
/* mixer state */
int mixer_nreg;
uint8_t mixer_regs[256];
PortioList portio_list;
} SB16State;
static void SB_audio_callback (void *opaque, int free);
@@ -1378,7 +1379,8 @@ static void sb16_realizefn (DeviceState *dev, Error **errp)
dolog ("warning: Could not create auxiliary timer\n");
}
isa_register_portio_list (isadev, s->port, sb16_ioport_list, s, "sb16");
isa_register_portio_list(isadev, &s->portio_list, s->port,
sb16_ioport_list, s, "sb16");
s->isa_hdma = isa_get_dma(isa_bus_from_device(isadev), s->hdma);
k = ISADMA_GET_CLASS(s->isa_hdma);

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# hw/audio/cs4231.c
cs4231_mem_readl_dreg(uint32_t reg, uint32_t ret) "read dreg %d: 0x%02x"

View File

@@ -692,6 +692,7 @@ struct FDCtrl {
/* Timers state */
uint8_t timer0;
uint8_t timer1;
PortioList portio_list;
};
static FloppyDriveType get_fallback_drive_type(FDrive *drv)
@@ -2495,7 +2496,8 @@ static void isabus_fdc_realize(DeviceState *dev, Error **errp)
FDCtrl *fdctrl = &isa->state;
Error *err = NULL;
isa_register_portio_list(isadev, isa->iobase, fdc_portio_list, fdctrl,
isa_register_portio_list(isadev, &fdctrl->portio_list,
isa->iobase, fdc_portio_list, fdctrl,
"fdc");
isa_init_irq(isadev, &fdctrl->irq, isa->irq);

View File

@@ -1189,9 +1189,9 @@ static Property m25p80_properties[] = {
};
static const VMStateDescription vmstate_m25p80 = {
.name = "xilinx_spi",
.version_id = 3,
.minimum_version_id = 1,
.name = "m25p80",
.version_id = 0,
.minimum_version_id = 0,
.pre_save = m25p80_pre_save,
.fields = (VMStateField[]) {
VMSTATE_UINT8(state, Flash),
@@ -1200,20 +1200,19 @@ static const VMStateDescription vmstate_m25p80 = {
VMSTATE_UINT32(pos, Flash),
VMSTATE_UINT8(needed_bytes, Flash),
VMSTATE_UINT8(cmd_in_progress, Flash),
VMSTATE_UNUSED(4),
VMSTATE_UINT32(cur_addr, Flash),
VMSTATE_BOOL(write_enable, Flash),
VMSTATE_BOOL_V(reset_enable, Flash, 2),
VMSTATE_UINT8_V(ear, Flash, 2),
VMSTATE_BOOL_V(four_bytes_address_mode, Flash, 2),
VMSTATE_UINT32_V(nonvolatile_cfg, Flash, 2),
VMSTATE_UINT32_V(volatile_cfg, Flash, 2),
VMSTATE_UINT32_V(enh_volatile_cfg, Flash, 2),
VMSTATE_BOOL_V(quad_enable, Flash, 3),
VMSTATE_UINT8_V(spansion_cr1nv, Flash, 3),
VMSTATE_UINT8_V(spansion_cr2nv, Flash, 3),
VMSTATE_UINT8_V(spansion_cr3nv, Flash, 3),
VMSTATE_UINT8_V(spansion_cr4nv, Flash, 3),
VMSTATE_BOOL(reset_enable, Flash),
VMSTATE_UINT8(ear, Flash),
VMSTATE_BOOL(four_bytes_address_mode, Flash),
VMSTATE_UINT32(nonvolatile_cfg, Flash),
VMSTATE_UINT32(volatile_cfg, Flash),
VMSTATE_UINT32(enh_volatile_cfg, Flash),
VMSTATE_BOOL(quad_enable, Flash),
VMSTATE_UINT8(spansion_cr1nv, Flash),
VMSTATE_UINT8(spansion_cr2nv, Flash),
VMSTATE_UINT8(spansion_cr3nv, Flash),
VMSTATE_UINT8(spansion_cr4nv, Flash),
VMSTATE_END_OF_LIST()
}
};

View File

@@ -469,19 +469,22 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
return NVME_SUCCESS;
}
static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
{
uint64_t prp1 = le64_to_cpu(c->prp1);
uint64_t prp2 = le64_to_cpu(c->prp2);
return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
prp1, prp2);
}
static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
{
NvmeNamespace *ns;
NvmeIdentify *c = (NvmeIdentify *)cmd;
uint32_t cns = le32_to_cpu(c->cns);
uint32_t nsid = le32_to_cpu(c->nsid);
uint64_t prp1 = le64_to_cpu(c->prp1);
uint64_t prp2 = le64_to_cpu(c->prp2);
if (cns) {
return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
prp1, prp2);
}
if (nsid == 0 || nsid > n->num_namespaces) {
return NVME_INVALID_NSID | NVME_DNR;
}
@@ -491,6 +494,48 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
prp1, prp2);
}
static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
{
static const int data_len = 4096;
uint32_t min_nsid = le32_to_cpu(c->nsid);
uint64_t prp1 = le64_to_cpu(c->prp1);
uint64_t prp2 = le64_to_cpu(c->prp2);
uint32_t *list;
uint16_t ret;
int i, j = 0;
list = g_malloc0(data_len);
for (i = 0; i < n->num_namespaces; i++) {
if (i < min_nsid) {
continue;
}
list[j++] = cpu_to_le32(i + 1);
if (j == data_len / sizeof(uint32_t)) {
break;
}
}
ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2);
g_free(list);
return ret;
}
static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
{
NvmeIdentify *c = (NvmeIdentify *)cmd;
switch (le32_to_cpu(c->cns)) {
case 0x00:
return nvme_identify_ns(n, c);
case 0x01:
return nvme_identify_ctrl(n, c);
case 0x02:
return nvme_identify_nslist(n, c);
default:
return NVME_INVALID_FIELD | NVME_DNR;
}
}
static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
uint32_t dw10 = le32_to_cpu(cmd->cdw10);
@@ -909,7 +954,7 @@ static void nvme_class_init(ObjectClass *oc, void *data)
pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
pc->vendor_id = PCI_VENDOR_ID_INTEL;
pc->device_id = 0x5845;
pc->revision = 1;
pc->revision = 2;
pc->is_express = 1;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# hw/block/virtio-blk.c
virtio_blk_req_complete(void *req, int status) "req %p status %d"

View File

@@ -654,15 +654,20 @@ static void virtio_blk_reset(VirtIODevice *vdev)
{
VirtIOBlock *s = VIRTIO_BLK(vdev);
AioContext *ctx;
VirtIOBlockReq *req;
/*
* This should cancel pending requests, but can't do nicely until there
* are per-device request lists.
*/
ctx = blk_get_aio_context(s->blk);
aio_context_acquire(ctx);
blk_drain(s->blk);
/* We drop queued requests after blk_drain() because blk_drain() itself can
* produce them. */
while (s->rq) {
req = s->rq;
s->rq = req->next;
virtio_blk_free_request(req);
}
if (s->dataplane) {
virtio_blk_data_plane_stop(s->dataplane);
}
@@ -798,7 +803,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
}
}
static void virtio_blk_save(QEMUFile *f, void *opaque)
static void virtio_blk_save(QEMUFile *f, void *opaque, size_t size)
{
VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
@@ -823,15 +828,12 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
qemu_put_sbyte(f, 0);
}
static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
static int virtio_blk_load(QEMUFile *f, void *opaque, size_t size)
{
VirtIOBlock *s = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(s);
if (version_id != 2)
return -EINVAL;
return virtio_load(vdev, f, version_id);
return virtio_load(vdev, f, 2);
}
static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -880,7 +882,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
VirtIOBlock *s = VIRTIO_BLK(dev);
VirtIOBlkConf *conf = &s->conf;
Error *err = NULL;
static int virtio_blk_id;
unsigned i;
if (!conf->conf.blk) {
@@ -914,7 +915,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
for (i = 0; i < conf->num_queues; i++) {
virtio_add_queue(vdev, 128, virtio_blk_handle_output);
virtio_add_queue_aio(vdev, 128, virtio_blk_handle_output);
}
virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
if (err != NULL) {
@@ -924,8 +925,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
}
s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
virtio_blk_save, virtio_blk_load, s);
blk_set_dev_ops(s->blk, &virtio_block_ops, s);
blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size);
@@ -940,7 +939,6 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp)
virtio_blk_data_plane_destroy(s->dataplane);
s->dataplane = NULL;
qemu_del_vm_change_state_handler(s->change);
unregister_savevm(dev, "virtio-blk", s);
blockdev_mark_auto_del(s->blk);
virtio_cleanup(vdev);
}
@@ -958,6 +956,8 @@ static void virtio_blk_instance_init(Object *obj)
DEVICE(obj), NULL);
}
VMSTATE_VIRTIO_DEVICE(blk, 2, virtio_blk_load, virtio_blk_save);
static Property virtio_blk_properties[] = {
DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf),
DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf),
@@ -979,6 +979,7 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data)
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
dc->props = virtio_blk_properties;
dc->vmsd = &vmstate_virtio_blk;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
vdc->realize = virtio_blk_device_realize;
vdc->unrealize = virtio_blk_device_unrealize;

View File

@@ -574,9 +574,10 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
{
struct blkif_request_discard *discard_req = (void *)&ioreq->req;
ioreq->aio_inflight++;
blk_aio_discard(blkdev->blk,
discard_req->sector_number, discard_req->nr_sectors,
qemu_aio_complete, ioreq);
blk_aio_pdiscard(blkdev->blk,
discard_req->sector_number << BDRV_SECTOR_BITS,
discard_req->nr_sectors << BDRV_SECTOR_BITS,
qemu_aio_complete, ioreq);
break;
}
default:
@@ -975,14 +976,16 @@ static int blk_connect(struct XenDevice *xendev)
blkdev->feature_persistent = !!pers;
}
blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
if (blkdev->xendev.protocol) {
if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
blkdev->protocol = BLKIF_PROTOCOL_X86_32;
}
if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
blkdev->protocol = BLKIF_PROTOCOL_X86_64;
}
if (!blkdev->xendev.protocol) {
blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
} else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_NATIVE) == 0) {
blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
} else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
blkdev->protocol = BLKIF_PROTOCOL_X86_32;
} else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
blkdev->protocol = BLKIF_PROTOCOL_X86_64;
} else {
blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
}
blkdev->sring = xengnttab_map_grant_ref(blkdev->xendev.gnttabdev,

View File

@@ -80,6 +80,7 @@ typedef struct ParallelState {
uint32_t last_read_offset; /* For debugging */
/* Memory-mapped interface */
int it_shift;
PortioList portio_list;
} ParallelState;
#define TYPE_ISA_PARALLEL "isa-parallel"
@@ -532,7 +533,7 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp)
s->status = dummy;
}
isa_register_portio_list(isadev, base,
isa_register_portio_list(isadev, &s->portio_list, base,
(s->hw_driver
? &isa_parallel_portio_hw_list[0]
: &isa_parallel_portio_sw_list[0]),

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# hw/char/virtio-serial-bus.c
virtio_serial_send_control_event(unsigned int port, uint16_t event, uint16_t value) "port %u, event %u, value %u"

View File

@@ -85,8 +85,9 @@ static void set_guest_connected(VirtIOSerialPort *port, int guest_connected)
{
VirtConsole *vcon = VIRTIO_CONSOLE(port);
DeviceState *dev = DEVICE(port);
VirtIOSerialPortClass *k = VIRTIO_SERIAL_PORT_GET_CLASS(port);
if (vcon->chr) {
if (vcon->chr && !k->is_console) {
qemu_chr_fe_set_open(vcon->chr, guest_connected);
}
@@ -156,9 +157,25 @@ static void virtconsole_realize(DeviceState *dev, Error **errp)
}
if (vcon->chr) {
vcon->chr->explicit_fe_open = 1;
qemu_chr_add_handlers(vcon->chr, chr_can_read, chr_read, chr_event,
vcon);
/*
* For consoles we don't block guest data transfer just
* because nothing is connected - we'll just let it go
* whetherever the chardev wants - /dev/null probably.
*
* For serial ports we need 100% reliable data transfer
* so we use the opened/closed signals from chardev to
* trigger open/close of the device
*/
if (k->is_console) {
vcon->chr->explicit_fe_open = 0;
qemu_chr_add_handlers(vcon->chr, chr_can_read, chr_read,
NULL, vcon);
virtio_serial_open(port);
} else {
vcon->chr->explicit_fe_open = 1;
qemu_chr_add_handlers(vcon->chr, chr_can_read, chr_read,
chr_event, vcon);
}
}
}

View File

@@ -594,12 +594,6 @@ static void vser_reset(VirtIODevice *vdev)
guest_reset(vser);
}
static void virtio_serial_save(QEMUFile *f, void *opaque)
{
/* The virtio device */
virtio_save(VIRTIO_DEVICE(opaque), f);
}
static void virtio_serial_save_device(VirtIODevice *vdev, QEMUFile *f)
{
VirtIOSerial *s = VIRTIO_SERIAL(vdev);
@@ -685,7 +679,7 @@ static void virtio_serial_post_load_timer_cb(void *opaque)
s->post_load = NULL;
}
static int fetch_active_ports_list(QEMUFile *f, int version_id,
static int fetch_active_ports_list(QEMUFile *f,
VirtIOSerial *s, uint32_t nr_active_ports)
{
uint32_t i;
@@ -702,6 +696,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id,
/* Items in struct VirtIOSerialPort */
for (i = 0; i < nr_active_ports; i++) {
VirtIOSerialPort *port;
uint32_t elem_popped;
uint32_t id;
id = qemu_get_be32(f);
@@ -714,37 +709,29 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id,
s->post_load->connected[i].port = port;
s->post_load->connected[i].host_connected = qemu_get_byte(f);
if (version_id > 2) {
uint32_t elem_popped;
qemu_get_be32s(f, &elem_popped);
if (elem_popped) {
qemu_get_be32s(f, &port->iov_idx);
qemu_get_be64s(f, &port->iov_offset);
qemu_get_be32s(f, &elem_popped);
if (elem_popped) {
qemu_get_be32s(f, &port->iov_idx);
qemu_get_be64s(f, &port->iov_offset);
port->elem =
qemu_get_virtqueue_element(f, sizeof(VirtQueueElement));
port->elem =
qemu_get_virtqueue_element(f, sizeof(VirtQueueElement));
/*
* Port was throttled on source machine. Let's
* unthrottle it here so data starts flowing again.
*/
virtio_serial_throttle_port(port, false);
}
/*
* Port was throttled on source machine. Let's
* unthrottle it here so data starts flowing again.
*/
virtio_serial_throttle_port(port, false);
}
}
timer_mod(s->post_load->timer, 1);
return 0;
}
static int virtio_serial_load(QEMUFile *f, void *opaque, int version_id)
static int virtio_serial_load(QEMUFile *f, void *opaque, size_t size)
{
if (version_id > 3) {
return -EINVAL;
}
/* The virtio device */
return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
return virtio_load(VIRTIO_DEVICE(opaque), f, 3);
}
static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -756,10 +743,6 @@ static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f,
int ret;
uint32_t tmp;
if (version_id < 2) {
return 0;
}
/* Unused */
qemu_get_be16s(f, (uint16_t *) &tmp);
qemu_get_be16s(f, (uint16_t *) &tmp);
@@ -781,7 +764,7 @@ static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f,
qemu_get_be32s(f, &nr_active_ports);
if (nr_active_ports) {
ret = fetch_active_ports_list(f, version_id, s, nr_active_ports);
ret = fetch_active_ports_list(f, s, nr_active_ports);
if (ret) {
return ret;
}
@@ -1049,13 +1032,6 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp)
vser->post_load = NULL;
/*
* Register for the savevm section with the virtio-console name
* to preserve backward compat
*/
register_savevm(dev, "virtio-console", -1, 3, virtio_serial_save,
virtio_serial_load, vser);
QLIST_INSERT_HEAD(&vserdevices.devices, vser, next);
}
@@ -1086,8 +1062,6 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
QLIST_REMOVE(vser, next);
unregister_savevm(dev, "virtio-console", vser);
g_free(vser->ivqs);
g_free(vser->ovqs);
g_free(vser->ports_map);
@@ -1100,6 +1074,9 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
virtio_cleanup(vdev);
}
/* Note: 'console' is used for backwards compatibility */
VMSTATE_VIRTIO_DEVICE(console, 3, virtio_serial_load, virtio_vmstate_save);
static Property virtio_serial_properties[] = {
DEFINE_PROP_UINT32("max_ports", VirtIOSerial, serial.max_virtserial_ports,
31),
@@ -1115,6 +1092,7 @@ static void virtio_serial_class_init(ObjectClass *klass, void *data)
QLIST_INIT(&vserdevices.devices);
dc->props = virtio_serial_properties;
dc->vmsd = &vmstate_virtio_console;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
vdc->realize = virtio_serial_device_realize;
vdc->unrealize = virtio_serial_device_unrealize;

View File

@@ -78,8 +78,7 @@ static void qbus_realize(BusState *bus, DeviceState *parent, const char *name)
{
const char *typename = object_get_typename(OBJECT(bus));
BusClass *bc;
char *buf;
int i, len, bus_id;
int i, bus_id;
bus->parent = parent;
@@ -88,23 +87,15 @@ static void qbus_realize(BusState *bus, DeviceState *parent, const char *name)
} else if (bus->parent && bus->parent->id) {
/* parent device has id -> use it plus parent-bus-id for bus name */
bus_id = bus->parent->num_child_bus;
len = strlen(bus->parent->id) + 16;
buf = g_malloc(len);
snprintf(buf, len, "%s.%d", bus->parent->id, bus_id);
bus->name = buf;
bus->name = g_strdup_printf("%s.%d", bus->parent->id, bus_id);
} else {
/* no id -> use lowercase bus type plus global bus-id for bus name */
bc = BUS_GET_CLASS(bus);
bus_id = bc->automatic_ids++;
len = strlen(typename) + 16;
buf = g_malloc(len);
len = snprintf(buf, len, "%s.%d", typename, bus_id);
for (i = 0; i < len; i++) {
buf[i] = qemu_tolower(buf[i]);
bus->name = g_strdup_printf("%s.%d", typename, bus_id);
for (i = 0; bus->name[i]; i++) {
bus->name[i] = qemu_tolower(bus->name[i]);
}
bus->name = buf;
}
if (bus->parent) {
@@ -229,7 +220,7 @@ static void qbus_finalize(Object *obj)
{
BusState *bus = BUS(obj);
g_free((char *)bus->name);
g_free(bus->name);
}
static const TypeInfo bus_info = {

View File

@@ -65,6 +65,9 @@ static void machine_set_kernel_irqchip(Object *obj, Visitor *v,
ms->kernel_irqchip_split = true;
break;
default:
/* The value was checked in visit_type_OnOffSplit() above. If
* we get here, then something is wrong in QEMU.
*/
abort();
}
}
@@ -558,6 +561,7 @@ static void machine_class_finalize(ObjectClass *klass, void *data)
if (mc->compat_props) {
g_array_free(mc->compat_props, true);
}
g_free(mc->name);
}
void machine_register_compat_props(MachineState *machine)

View File

@@ -126,7 +126,16 @@ static void release_drive(Object *obj, const char *name, void *opaque)
static char *print_drive(void *ptr)
{
return g_strdup(blk_name(ptr));
const char *name;
name = blk_name(ptr);
if (!*name) {
BlockDriverState *bs = blk_bs(ptr);
if (bs) {
name = bdrv_get_node_name(bs);
}
}
return g_strdup(name);
}
static void get_drive(Object *obj, Visitor *v, const char *name, void *opaque,
@@ -247,7 +256,7 @@ static void set_netdev(Object *obj, Visitor *v, const char *name,
}
queues = qemu_find_net_clients_except(str, peers,
NET_CLIENT_OPTIONS_KIND_NIC,
NET_CLIENT_DRIVER_NIC,
MAX_QUEUE_NUM);
if (queues == 0) {
err = -ENOENT;

View File

@@ -1084,7 +1084,7 @@ int qdev_prop_check_globals(void)
}
static void qdev_prop_set_globals_for_type(DeviceState *dev,
const char *typename)
const char *typename)
{
GList *l;
@@ -1100,7 +1100,7 @@ static void qdev_prop_set_globals_for_type(DeviceState *dev,
if (err != NULL) {
error_prepend(&err, "can't apply global %s.%s=%s: ",
prop->driver, prop->property, prop->value);
if (prop->errp) {
if (!dev->hotplugged && prop->errp) {
error_propagate(prop->errp, err);
} else {
assert(prop->user_provided);

View File

@@ -354,12 +354,14 @@ void qdev_init_nofail(DeviceState *dev)
assert(!dev->realized);
object_ref(OBJECT(dev));
object_property_set_bool(OBJECT(dev), true, "realized", &err);
if (err) {
error_reportf_err(err, "Initialization of device %s failed: ",
object_get_typename(OBJECT(dev)));
exit(1);
}
object_unref(OBJECT(dev));
}
void qdev_machine_creation_done(void)
@@ -885,6 +887,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
HotplugHandler *hotplug_ctrl;
BusState *bus;
Error *local_err = NULL;
bool unattached_parent = false;
static int unattached_count;
if (dev->hotplugged && !dc->hotpluggable) {
error_setg(errp, QERR_DEVICE_NO_HOTPLUG, object_get_typename(obj));
@@ -893,12 +897,12 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
if (value && !dev->realized) {
if (!obj->parent) {
static int unattached_count;
gchar *name = g_strdup_printf("device[%d]", unattached_count++);
object_property_add_child(container_get(qdev_get_machine(),
"/unattached"),
name, obj, &error_abort);
unattached_parent = true;
g_free(name);
}
@@ -987,6 +991,10 @@ post_realize_fail:
fail:
error_propagate(errp, local_err);
if (unattached_parent) {
object_unparent(OBJECT(dev));
unattached_count--;
}
}
static bool device_get_hotpluggable(Object *obj, Error **errp)

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# hw/display/jazz_led.c
jazz_led_read(uint64_t addr, uint8_t val) "read addr=0x%"PRIx64": 0x%x"

Some files were not shown because too many files have changed in this diff Show More